CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 352 results for author: <span class="mathjax">Cui, S</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Cui%2C+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Cui, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Cui%2C+S&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Cui, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06681">arXiv:2411.06681</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06681">pdf</a>, <a href="https://arxiv.org/format/2411.06681">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> WDMoE: Wireless Distributed Mixture of Experts for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xue%2C+N">Nan Xue</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yaping Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhiyong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+M">Meixia Tao</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xiaodong Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Qian%2C+L">Liang Qian</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wenjun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+P">Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06681v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have achieved significant success in various natural language processing tasks, but the role of wireless networks in supporting LLMs has not been thoroughly explored. In this paper, we propose a wireless distributed Mixture of Experts (WDMoE) architecture to enable collaborative deployment of LLMs across edge servers at the base station (BS) and mobile devices in wirel&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06681v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06681v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06681v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have achieved significant success in various natural language processing tasks, but the role of wireless networks in supporting LLMs has not been thoroughly explored. In this paper, we propose a wireless distributed Mixture of Experts (WDMoE) architecture to enable collaborative deployment of LLMs across edge servers at the base station (BS) and mobile devices in wireless networks. Specifically, we decompose the MoE layer in LLMs by placing the gating network and the preceding neural network layer at BS, while distributing the expert networks among the devices. This deployment leverages the parallel inference capabilities of expert networks on mobile devices, effectively utilizing the limited computing and caching resources of these devices. Accordingly, we develop a performance metric for WDMoE-based LLMs, which accounts for both model capability and latency. To minimize the latency while maintaining accuracy, we jointly optimize expert selection and bandwidth allocation based on the performance metric. Moreover, we build a hardware testbed using NVIDIA Jetson kits to validate the effectiveness of WDMoE. Both theoretical simulations and practical hardware experiments demonstrate that the proposed method can significantly reduce the latency without compromising LLM performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06681v1-abstract-full').style.display = 'none'; document.getElementById('2411.06681v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03127">arXiv:2411.03127</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.03127">pdf</a>, <a href="https://arxiv.org/format/2411.03127">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Receiver-Centric Generative Semantic Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xunze Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yifei Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhaorui Wang</a>, <a href="/search/cs?searchtype=author&amp;query=You%2C+L">Lizhao You</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+H">Haoyuan Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+F">Fangxin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03127v2-abstract-short" style="display: inline;"> This paper investigates semantic communications between a transmitter and a receiver, where original data, such as videos of interest to the receiver, is stored at the transmitter. Although significant process has been made in semantic communications, a fundamental design problem is that the semantic information is extracted based on certain criteria at the transmitter alone, without considering t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03127v2-abstract-full').style.display = 'inline'; document.getElementById('2411.03127v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03127v2-abstract-full" style="display: none;"> This paper investigates semantic communications between a transmitter and a receiver, where original data, such as videos of interest to the receiver, is stored at the transmitter. Although significant process has been made in semantic communications, a fundamental design problem is that the semantic information is extracted based on certain criteria at the transmitter alone, without considering the receiver&#39;s specific information needs. As a result, critical information of primary concern to the receiver may be lost. In such cases, the semantic transmission becomes meaningless to the receiver, as all received information is irrelevant to its interests. To solve this problem, this paper presents a receiver-centric generative semantic communication system, where each transmission is initialized by the receiver. Specifically, the receiver first sends its request for the desired semantic information to the transmitter at the start of each transmission. Then, the transmitter extracts the required semantic information accordingly. A key challenge is how the transmitter understands the receiver&#39;s requests for semantic information and extracts the required semantic information in a reasonable and robust manner. We address this challenge by designing a well-structured framework and leveraging off-the-shelf generative AI products, such as GPT-4, along with several specialized tools for detection and estimation. Evaluation results demonstrate the feasibility and effectiveness of the proposed new semantic communication system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03127v2-abstract-full').style.display = 'none'; document.getElementById('2411.03127v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Demo video has been made available at: https://goo.su/dUnAT</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22771">arXiv:2410.22771</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.22771">pdf</a>, <a href="https://arxiv.org/format/2410.22771">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FuseAnyPart: Diffusion-Driven Facial Parts Swapping via Multiple Reference Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yu%2C+Z">Zheng Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yaohua Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Siying Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+A">Aixi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+W">Wei-Long Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Senzhang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22771v2-abstract-short" style="display: inline;"> Facial parts swapping aims to selectively transfer regions of interest from the source image onto the target image while maintaining the rest of the target image unchanged. Most studies on face swapping designed specifically for full-face swapping, are either unable or significantly limited when it comes to swapping individual facial parts, which hinders fine-grained and customized character desig&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22771v2-abstract-full').style.display = 'inline'; document.getElementById('2410.22771v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22771v2-abstract-full" style="display: none;"> Facial parts swapping aims to selectively transfer regions of interest from the source image onto the target image while maintaining the rest of the target image unchanged. Most studies on face swapping designed specifically for full-face swapping, are either unable or significantly limited when it comes to swapping individual facial parts, which hinders fine-grained and customized character designs. However, designing such an approach specifically for facial parts swapping is challenged by a reasonable multiple reference feature fusion, which needs to be both efficient and effective. To overcome this challenge, FuseAnyPart is proposed to facilitate the seamless &#34;fuse-any-part&#34; customization of the face. In FuseAnyPart, facial parts from different people are assembled into a complete face in latent space within the Mask-based Fusion Module. Subsequently, the consolidated feature is dispatched to the Addition-based Injection Module for fusion within the UNet of the diffusion model to create novel characters. Extensive experiments qualitatively and quantitatively validate the superiority and robustness of FuseAnyPart. Source codes are available at https://github.com/Thomas-wyh/FuseAnyPart. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22771v2-abstract-full').style.display = 'none'; document.getElementById('2410.22771v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the NeurIPS 2024 (Spotlight). Homepage: https://thomas-wyh.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04761">arXiv:2410.04761</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.04761">pdf</a>, <a href="https://arxiv.org/format/2410.04761">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Shuffling Gradient Descent-Ascent with Variance Reduction for Nonconvex-Strongly Concave Smooth Minimax Problems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+X">Xia Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+L">Linglingzhi Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=So%2C+A+M">Anthony Man-Cho So</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shisheng Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+J">Jian Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04761v1-abstract-short" style="display: inline;"> In recent years, there has been considerable interest in designing stochastic first-order algorithms to tackle finite-sum smooth minimax problems. To obtain the gradient estimates, one typically relies on the uniform sampling-with-replacement scheme or various sampling-without-replacement (also known as shuffling) schemes. While the former is easier to analyze, the latter often have better empiric&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04761v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04761v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04761v1-abstract-full" style="display: none;"> In recent years, there has been considerable interest in designing stochastic first-order algorithms to tackle finite-sum smooth minimax problems. To obtain the gradient estimates, one typically relies on the uniform sampling-with-replacement scheme or various sampling-without-replacement (also known as shuffling) schemes. While the former is easier to analyze, the latter often have better empirical performance. In this paper, we propose a novel single-loop stochastic gradient descent-ascent (GDA) algorithm that employs both shuffling schemes and variance reduction to solve nonconvex-strongly concave smooth minimax problems. We show that the proposed algorithm achieves $蔚$-stationarity in expectation in $\mathcal{O}(魏^2 蔚^{-2})$ iterations, where $魏$ is the condition number of the problem. This outperforms existing shuffling schemes and matches the complexity of the best-known sampling-with-replacement algorithms. Our proposed algorithm also achieves the same complexity as that of its deterministic counterpart, the two-timescale GDA algorithm. Our numerical experiments demonstrate the superior performance of the proposed algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04761v1-abstract-full').style.display = 'none'; document.getElementById('2410.04761v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03459">arXiv:2410.03459</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.03459">pdf</a>, <a href="https://arxiv.org/format/2410.03459">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Generative Semantic Communication for Text-to-Speech Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+J">Jiahao Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+J">Jinke Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+P">Peng Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+Z">Zhihao Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+F">Fangxin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gui%2C+G">Gui Gui</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03459v1-abstract-short" style="display: inline;"> Semantic communication is a promising technology to improve communication efficiency by transmitting only the semantic information of the source data. However, traditional semantic communication methods primarily focus on data reconstruction tasks, which may not be efficient for emerging generative tasks such as text-to-speech (TTS) synthesis. To address this limitation, this paper develops a nove&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03459v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03459v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03459v1-abstract-full" style="display: none;"> Semantic communication is a promising technology to improve communication efficiency by transmitting only the semantic information of the source data. However, traditional semantic communication methods primarily focus on data reconstruction tasks, which may not be efficient for emerging generative tasks such as text-to-speech (TTS) synthesis. To address this limitation, this paper develops a novel generative semantic communication framework for TTS synthesis, leveraging generative artificial intelligence technologies. Firstly, we utilize a pre-trained large speech model called WavLM and the residual vector quantization method to construct two semantic knowledge bases (KBs) at the transmitter and receiver, respectively. The KB at the transmitter enables effective semantic extraction, while the KB at the receiver facilitates lifelike speech synthesis. Then, we employ a transformer encoder and a diffusion model to achieve efficient semantic coding without introducing significant communication overhead. Finally, numerical results demonstrate that our framework achieves much higher fidelity for the generated speech than four baselines, in both cases with additive white Gaussian noise channel and Rayleigh fading channel. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03459v1-abstract-full').style.display = 'none'; document.getElementById('2410.03459v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper has been accepted by IEEE Globecom Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02405">arXiv:2410.02405</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.02405">pdf</a>, <a href="https://arxiv.org/format/2410.02405">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Cooperative Semantic Knowledge Base Update Policy for Multiple Semantic Communication Pairs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shuling Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yaping Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jinbei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+K">Kechao Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xiaodong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02405v1-abstract-short" style="display: inline;"> Semantic communication has emerged as a promising communication paradigm and there have been extensive research focusing on its applications in the increasingly prevalent multi-user scenarios. However, the knowledge discrepancy among multiple users may lead to considerable disparities in their performance. To address this challenge, this paper proposes a novel multi-pair cooperative semantic knowl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02405v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02405v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02405v1-abstract-full" style="display: none;"> Semantic communication has emerged as a promising communication paradigm and there have been extensive research focusing on its applications in the increasingly prevalent multi-user scenarios. However, the knowledge discrepancy among multiple users may lead to considerable disparities in their performance. To address this challenge, this paper proposes a novel multi-pair cooperative semantic knowledge base (SKB) update policy. Specifically, for each pair endowed with SKB-enabled semantic communication, its well-understood knowledge in the local SKB is selected out and uploaded to the server to establish a global SKB, via a score-based knowledge selection scheme. The knowledge selection scheme achieves a balance between the uplink transmission overhead and the completeness of the global SKB. Then, with the assistance of the global SKB, each pair&#39;s local SKB is refined and their performance is improved. Numerical results show that the proposed cooperative SKB update policy obtains significant performance gains with minimal transmission overhead, especially for the initially poor-performing pairs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02405v1-abstract-full').style.display = 'none'; document.getElementById('2410.02405v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16774">arXiv:2409.16774</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.16774">pdf</a>, <a href="https://arxiv.org/format/2409.16774">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MixPolyp: Integrating Mask, Box and Scribble Supervision for Enhanced Polyp Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Y">Yiwen Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+J">Jun Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yuncheng Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoyang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+S">Song Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16774v1-abstract-short" style="display: inline;"> Limited by the expensive labeling, polyp segmentation models are plagued by data shortages. To tackle this, we propose the mixed supervised polyp segmentation paradigm (MixPolyp). Unlike traditional models relying on a single type of annotation, MixPolyp combines diverse annotation types (mask, box, and scribble) within a single model, thereby expanding the range of available data and reducing lab&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16774v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16774v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16774v1-abstract-full" style="display: none;"> Limited by the expensive labeling, polyp segmentation models are plagued by data shortages. To tackle this, we propose the mixed supervised polyp segmentation paradigm (MixPolyp). Unlike traditional models relying on a single type of annotation, MixPolyp combines diverse annotation types (mask, box, and scribble) within a single model, thereby expanding the range of available data and reducing labeling costs. To achieve this, MixPolyp introduces three novel supervision losses to handle various annotations: Subspace Projection loss (L_SP), Binary Minimum Entropy loss (L_BME), and Linear Regularization loss (L_LR). For box annotations, L_SP eliminates shape inconsistencies between the prediction and the supervision. For scribble annotations, L_BME provides supervision for unlabeled pixels through minimum entropy constraint, thereby alleviating supervision sparsity. Furthermore, L_LR provides dense supervision by enforcing consistency among the predictions, thus reducing the non-uniqueness. These losses are independent of the model structure, making them generally applicable. They are used only during training, adding no computational cost during inference. Extensive experiments on five datasets demonstrate MixPolyp&#39;s effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16774v1-abstract-full').style.display = 'none'; document.getElementById('2409.16774v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in IEEE BIBM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09780">arXiv:2409.09780</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.09780">pdf</a>, <a href="https://arxiv.org/ps/2409.09780">ps</a>, <a href="https://arxiv.org/format/2409.09780">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LCOMM.2024.3462828">10.1109/LCOMM.2024.3462828 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Power Allocation for Finite-Blocklength IR-HARQ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wenyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+M">Minhao Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+K">Kaiming Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhaorui Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09780v1-abstract-short" style="display: inline;"> This letter concerns the power allocation across the multiple transmission rounds under the Incremental Redundancy Hybrid Automatic Repeat reQuest (IR-HARQ) policy, in pursuit of an energy-efficient way of fulfilling the outage probability target in the finite-blocklength regime. We start by showing that the optimization objective and the constraints of the above power allocation problem all depen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09780v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09780v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09780v1-abstract-full" style="display: none;"> This letter concerns the power allocation across the multiple transmission rounds under the Incremental Redundancy Hybrid Automatic Repeat reQuest (IR-HARQ) policy, in pursuit of an energy-efficient way of fulfilling the outage probability target in the finite-blocklength regime. We start by showing that the optimization objective and the constraints of the above power allocation problem all depend upon the outage probability. The main challenge then lies in the fact that the outage probability cannot be written analytically in terms of the power variables. To sidestep this difficulty, we propose a novel upper bound on the outage probability in the finite-blocklength regime, which is much tighter than the existing ones from the literature. Most importantly, by using this upper bound to approximate the outage probability, we can recast the original intractable power allocation problem into a geometric programming (GP) form--which can be efficiently solved by the standard method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09780v1-abstract-full').style.display = 'none'; document.getElementById('2409.09780v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Communications Letters 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00103">arXiv:2409.00103</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.00103">pdf</a>, <a href="https://arxiv.org/format/2409.00103">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Nuance Matters: Probing Epistemic Consistency in Causal Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shaobo Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Junyou Li</a>, <a href="/search/cs?searchtype=author&amp;query=Mouchel%2C+L">Luca Mouchel</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Y">Yiyang Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Faltings%2C+B">Boi Faltings</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00103v1-abstract-short" style="display: inline;"> To address this gap, our study introduces the concept of causal epistemic consistency, which focuses on the self-consistency of Large Language Models (LLMs) in differentiating intermediates with nuanced differences in causal reasoning. We propose a suite of novel metrics -- intensity ranking concordance, cross-group position agreement, and intra-group clustering -- to evaluate LLMs on this front.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00103v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00103v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00103v1-abstract-full" style="display: none;"> To address this gap, our study introduces the concept of causal epistemic consistency, which focuses on the self-consistency of Large Language Models (LLMs) in differentiating intermediates with nuanced differences in causal reasoning. We propose a suite of novel metrics -- intensity ranking concordance, cross-group position agreement, and intra-group clustering -- to evaluate LLMs on this front. Through extensive empirical studies on 21 high-profile LLMs, including GPT-4, Claude3, and LLaMA3-70B, we have favoring evidence that current models struggle to maintain epistemic consistency in identifying the polarity and intensity of intermediates in causal reasoning. Additionally, we explore the potential of using internal token probabilities as an auxiliary tool to maintain causal epistemic consistency. In summary, our study bridges a critical gap in AI research by investigating the self-consistency over fine-grained intermediates involved in causal reasoning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00103v1-abstract-full').style.display = 'none'; document.getElementById('2409.00103v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14122">arXiv:2408.14122</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.14122">pdf</a>, <a href="https://arxiv.org/format/2408.14122">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> FG-SAT: Efficient Flow Graph for Encrypted Traffic Classification under Environment Shifts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Susu Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+X">Xueying Han</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+D">Dongqi Han</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhiliang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Weihang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+B">Bo Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+B">Baoxu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Z">Zhigang Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14122v1-abstract-short" style="display: inline;"> Encrypted traffic classification plays a critical role in network security and management. Currently, mining deep patterns from side-channel contents and plaintext fields through neural networks is a major solution. However, existing methods have two major limitations: (1) They fail to recognize the critical link between transport layer mechanisms and applications, missing the opportunity to learn&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14122v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14122v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14122v1-abstract-full" style="display: none;"> Encrypted traffic classification plays a critical role in network security and management. Currently, mining deep patterns from side-channel contents and plaintext fields through neural networks is a major solution. However, existing methods have two major limitations: (1) They fail to recognize the critical link between transport layer mechanisms and applications, missing the opportunity to learn internal structure features for accurate traffic classification. (2) They assume network traffic in an unrealistically stable and singular environment, making it difficult to effectively classify real-world traffic under environment shifts. In this paper, we propose FG-SAT, the first end-to-end method for encrypted traffic analysis under environment shifts. We propose a key abstraction, the Flow Graph, to represent flow internal relationship structures and rich node attributes, which enables robust and generalized representation. Additionally, to address the problem of inconsistent data distribution under environment shifts, we introduce a novel feature selection algorithm based on Jensen-Shannon divergence (JSD) to select robust node attributes. Finally, we design a classifier, GraphSAT, which integrates GraphSAGE and GAT to deeply learn Flow Graph features, enabling accurate encrypted traffic identification. FG-SAT exhibits both efficient and robust classification performance under environment shifts and outperforms state-of-the-art methods in encrypted attack detection and application classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14122v1-abstract-full').style.display = 'none'; document.getElementById('2408.14122v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Ready to submit to IEEE Transactions on Information Forensics and Security (TIFS)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14051">arXiv:2408.14051</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.14051">pdf</a>, <a href="https://arxiv.org/format/2408.14051">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Let Video Teaches You More: Video-to-Image Knowledge Distillation using DEtection TRansformer for Medical Video Lesion Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yuncheng Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zixun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+J">Jun Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+C">Chun-Mei Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+G">Guanbin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+X">Xiang Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhen Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14051v1-abstract-short" style="display: inline;"> AI-assisted lesion detection models play a crucial role in the early screening of cancer. However, previous image-based models ignore the inter-frame contextual information present in videos. On the other hand, video-based models capture the inter-frame context but are computationally expensive. To mitigate this contradiction, we delve into Video-to-Image knowledge distillation leveraging DEtectio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14051v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14051v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14051v1-abstract-full" style="display: none;"> AI-assisted lesion detection models play a crucial role in the early screening of cancer. However, previous image-based models ignore the inter-frame contextual information present in videos. On the other hand, video-based models capture the inter-frame context but are computationally expensive. To mitigate this contradiction, we delve into Video-to-Image knowledge distillation leveraging DEtection TRansformer (V2I-DETR) for the task of medical video lesion detection. V2I-DETR adopts a teacher-student network paradigm. The teacher network aims at extracting temporal contexts from multiple frames and transferring them to the student network, and the student network is an image-based model dedicated to fast prediction in inference. By distilling multi-frame contexts into a single frame, the proposed V2I-DETR combines the advantages of utilizing temporal contexts from video-based models and the inference speed of image-based models. Through extensive experiments, V2I-DETR outperforms previous state-of-the-art methods by a large margin while achieving the real-time inference speed (30 FPS) as the image-based model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14051v1-abstract-full').style.display = 'none'; document.getElementById('2408.14051v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">BIBM2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10067">arXiv:2408.10067</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.10067">pdf</a>, <a href="https://arxiv.org/format/2408.10067">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards a Benchmark for Colorectal Cancer Segmentation in Endorectal Ultrasound Videos: Dataset and Model Development </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yuncheng Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Y">Yiwen Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zixun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+J">Jun Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+C">Chun-Mei Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+X">Xuemei Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+X">Xiang Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhen Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10067v1-abstract-short" style="display: inline;"> Endorectal ultrasound (ERUS) is an important imaging modality that provides high reliability for diagnosing the depth and boundary of invasion in colorectal cancer. However, the lack of a large-scale ERUS dataset with high-quality annotations hinders the development of automatic ultrasound diagnostics. In this paper, we collected and annotated the first benchmark dataset that covers diverse ERUS s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10067v1-abstract-full').style.display = 'inline'; document.getElementById('2408.10067v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10067v1-abstract-full" style="display: none;"> Endorectal ultrasound (ERUS) is an important imaging modality that provides high reliability for diagnosing the depth and boundary of invasion in colorectal cancer. However, the lack of a large-scale ERUS dataset with high-quality annotations hinders the development of automatic ultrasound diagnostics. In this paper, we collected and annotated the first benchmark dataset that covers diverse ERUS scenarios, i.e. colorectal cancer segmentation, detection, and infiltration depth staging. Our ERUS-10K dataset comprises 77 videos and 10,000 high-resolution annotated frames. Based on this dataset, we further introduce a benchmark model for colorectal cancer segmentation, named the Adaptive Sparse-context TRansformer (ASTR). ASTR is designed based on three considerations: scanning mode discrepancy, temporal information, and low computational complexity. For generalizing to different scanning modes, the adaptive scanning-mode augmentation is proposed to convert between raw sector images and linear scan ones. For mining temporal information, the sparse-context transformer is incorporated to integrate inter-frame local and global features. For reducing computational complexity, the sparse-context block is introduced to extract contextual features from auxiliary frames. Finally, on the benchmark dataset, the proposed ASTR model achieves a 77.6% Dice score in rectal cancer segmentation, largely outperforming previous state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10067v1-abstract-full').style.display = 'none'; document.getElementById('2408.10067v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08602">arXiv:2408.08602</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.08602">pdf</a>, <a href="https://arxiv.org/format/2408.08602">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Discrete-time SIS Social Contagion Processes on Hypergraphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liang%2C+L">Lidan Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shaoxuan Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+F">Fangzhou Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08602v1-abstract-short" style="display: inline;"> Recent research on social contagion processes has revealed the limitations of traditional networks, which capture only pairwise relationships, to characterize complex multiparty relationships and group influences properly. Social contagion processes on higher-order networks (simplicial complexes and general hypergraphs) have therefore emerged as a novel frontier. In this work, we investigate discr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08602v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08602v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08602v1-abstract-full" style="display: none;"> Recent research on social contagion processes has revealed the limitations of traditional networks, which capture only pairwise relationships, to characterize complex multiparty relationships and group influences properly. Social contagion processes on higher-order networks (simplicial complexes and general hypergraphs) have therefore emerged as a novel frontier. In this work, we investigate discrete-time Susceptible-Infected-Susceptible (SIS) social contagion processes occurring on weighted and directed hypergraphs and their extensions to bivirus cases and general higher-order SIS processes with the aid of tensor algebra. Our focus lies in comprehensively characterizing the healthy state and endemic equilibria within this framework. The emergence of bistability or multistability behavior phenomena, where multiple equilibria coexist and are simultaneously locally asymptotically stable, is demonstrated in view of the presence of the higher-order interaction. The novel sufficient conditions of the appearance for system behaviors, which are determined by both (higher-order) network topology and transition rates, are provided to assess the likelihood of the SIS social contagion processes causing an outbreak. More importantly, given the equilibrium is locally stable, an explicit domain of attraction associated with the system parameters is constructed. Moreover, a learning method to estimate the transition rates is presented. In the end, the attained theoretical results are supplemented via numerical examples. Specifically, we evaluate the effectiveness of the networked SIS social contagion process by comparing it with the $2^n$-state Markov chain model. These numerical examples are given to highlight the performance of parameter learning algorithms and the system behaviors of the discrete-time SIS social contagion process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08602v1-abstract-full').style.display = 'none'; document.getElementById('2408.08602v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06945">arXiv:2408.06945</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.06945">pdf</a>, <a href="https://arxiv.org/ps/2408.06945">ps</a>, <a href="https://arxiv.org/format/2408.06945">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Heavy-Ball Momentum Accelerated Actor-Critic With Function Approximation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Y">Yanjie Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Haijun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+G">Gang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shisheng Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+X">Xiping Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06945v2-abstract-short" style="display: inline;"> By using an parametric value function to replace the Monte-Carlo rollouts for value estimation, the actor-critic (AC) algorithms can reduce the variance of stochastic policy gradient so that to improve the convergence rate. While existing works mainly focus on analyzing convergence rate of AC algorithms under Markovian noise, the impacts of momentum on AC algorithms remain largely unexplored. In t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06945v2-abstract-full').style.display = 'inline'; document.getElementById('2408.06945v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06945v2-abstract-full" style="display: none;"> By using an parametric value function to replace the Monte-Carlo rollouts for value estimation, the actor-critic (AC) algorithms can reduce the variance of stochastic policy gradient so that to improve the convergence rate. While existing works mainly focus on analyzing convergence rate of AC algorithms under Markovian noise, the impacts of momentum on AC algorithms remain largely unexplored. In this work, we first propose a heavy-ball momentum based advantage actor-critic (\mbox{HB-A2C}) algorithm by integrating the heavy-ball momentum into the critic recursion that is parameterized by a linear function. When the sample trajectory follows a Markov decision process, we quantitatively certify the acceleration capability of the proposed HB-A2C algorithm. Our theoretical results demonstrate that the proposed HB-A2C finds an $蔚$-approximate stationary point with $\oo{蔚^{-2}}$ iterations for reinforcement learning tasks with Markovian noise. Moreover, we also reveal the dependence of learning rates on the length of the sample trajectory. By carefully selecting the momentum factor of the critic recursion, the proposed HB-A2C can balance the errors introduced by the initialization and the stoschastic approximation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06945v2-abstract-full').style.display = 'none'; document.getElementById('2408.06945v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06828">arXiv:2408.06828</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.06828">pdf</a>, <a href="https://arxiv.org/format/2408.06828">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Photometric Inverse Rendering: Shading Cues Modeling and Surface Reflectance Regularization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bao%2C+J">Jingzhi Bao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guanying Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06828v1-abstract-short" style="display: inline;"> This paper addresses the problem of inverse rendering from photometric images. Existing approaches for this problem suffer from the effects of self-shadows, inter-reflections, and lack of constraints on the surface reflectance, leading to inaccurate decomposition of reflectance and illumination due to the ill-posed nature of inverse rendering. In this work, we propose a new method for neural inver&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06828v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06828v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06828v1-abstract-full" style="display: none;"> This paper addresses the problem of inverse rendering from photometric images. Existing approaches for this problem suffer from the effects of self-shadows, inter-reflections, and lack of constraints on the surface reflectance, leading to inaccurate decomposition of reflectance and illumination due to the ill-posed nature of inverse rendering. In this work, we propose a new method for neural inverse rendering. Our method jointly optimizes the light source position to account for the self-shadows in images, and computes indirect illumination using a differentiable rendering layer and an importance sampling strategy. To enhance surface reflectance decomposition, we introduce a new regularization by distilling DINO features to foster accurate and consistent material decomposition. Extensive experiments on synthetic and real datasets demonstrate that our method outperforms the state-of-the-art methods in reflectance decomposition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06828v1-abstract-full').style.display = 'none'; document.getElementById('2408.06828v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://jzbao03.site/projects/PIR/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03675">arXiv:2408.03675</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.03675">pdf</a>, <a href="https://arxiv.org/format/2408.03675">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> NACL: A General and Effective KV Cache Eviction Framework for LLMs at Inference Time </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yilong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+G">Guoxia Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Shang%2C+J">Junyuan Shang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shiyao Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhenyu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tingwen Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shuohuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yu Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+D">Dianhai Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+H">Hua Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03675v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have ignited an innovative surge of AI applications, marking a new era of exciting possibilities equipped with extended context windows. However, hosting these models is cost-prohibitive mainly due to the extensive memory consumption of KV Cache involving long-context modeling. Despite several works proposing to evict unnecessary tokens from the KV Cache, most of them&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03675v2-abstract-full').style.display = 'inline'; document.getElementById('2408.03675v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03675v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have ignited an innovative surge of AI applications, marking a new era of exciting possibilities equipped with extended context windows. However, hosting these models is cost-prohibitive mainly due to the extensive memory consumption of KV Cache involving long-context modeling. Despite several works proposing to evict unnecessary tokens from the KV Cache, most of them rely on the biased local statistics of accumulated attention scores and report performance using unconvincing metric like perplexity on inadequate short-text evaluation. In this paper, we propose NACL, a general framework for long-context KV cache eviction that achieves more optimal and efficient eviction in a single operation during the encoding phase. Due to NACL&#39;s efficiency, we combine more accurate attention score statistics in PROXY TOKENS EVICTION with the diversified random eviction strategy of RANDOM EVICTION, aiming to alleviate the issue of attention bias and enhance the robustness in maintaining pivotal tokens for long-context modeling tasks. Notably, our method significantly improves the performance on short- and long-text tasks by 80% and 76% respectively, reducing KV Cache by up to 50% with over 95% performance maintenance. The code is available at https://github.com/PaddlePaddle/Research/tree/master/NLP/ACL2024-NACL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03675v2-abstract-full').style.display = 'none'; document.getElementById('2408.03675v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACL 2024 (main conference, long paper)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03618">arXiv:2408.03618</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.03618">pdf</a>, <a href="https://arxiv.org/format/2408.03618">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Logical Fallacy-Informed Framework for Argument Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Mouchel%2C+L">Luca Mouchel</a>, <a href="/search/cs?searchtype=author&amp;query=Paul%2C+D">Debjit Paul</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shaobo Cui</a>, <a href="/search/cs?searchtype=author&amp;query=West%2C+R">Robert West</a>, <a href="/search/cs?searchtype=author&amp;query=Bosselut%2C+A">Antoine Bosselut</a>, <a href="/search/cs?searchtype=author&amp;query=Faltings%2C+B">Boi Faltings</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03618v2-abstract-short" style="display: inline;"> Despite the remarkable performance of Large Language Models (LLMs) in natural language processing tasks, they still struggle with generating logically sound arguments, resulting in potential risks such as spreading misinformation. To address this issue, we introduce FIPO, a fallacy-informed framework that leverages preference optimization methods to steer LLMs toward logically sound arguments. FIP&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03618v2-abstract-full').style.display = 'inline'; document.getElementById('2408.03618v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03618v2-abstract-full" style="display: none;"> Despite the remarkable performance of Large Language Models (LLMs) in natural language processing tasks, they still struggle with generating logically sound arguments, resulting in potential risks such as spreading misinformation. To address this issue, we introduce FIPO, a fallacy-informed framework that leverages preference optimization methods to steer LLMs toward logically sound arguments. FIPO includes a classification loss, to capture the fine-grained information on fallacy types. Our results on argumentation datasets show that our method reduces the fallacy errors by up to 17.5%. Furthermore, our human evaluation results indicate that the quality of the generated arguments by our method significantly outperforms the fine-tuned baselines, as well as other preference optimization methods, such as DPO. These findings highlight the importance of ensuring models are aware of logical fallacies for effective argument generation. Our code is available at github.com/lucamouchel/Logical-Fallacies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03618v2-abstract-full').style.display = 'none'; document.getElementById('2408.03618v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02047">arXiv:2408.02047</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.02047">pdf</a>, <a href="https://arxiv.org/format/2408.02047">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Latency-Aware Resource Allocation for Mobile Edge Generation and Computing via Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yinyu Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xuhui Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+J">Jinke Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+H">Huijun Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Y">Yanyan Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02047v2-abstract-short" style="display: inline;"> Recently, the integration of mobile edge computing (MEC) and generative artificial intelligence (GAI) technology has given rise to a new area called mobile edge generation and computing (MEGC), which offers mobile users heterogeneous services such as task computing and content generation. In this letter, we investigate the joint communication, computation, and the AIGC resource allocation problem&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02047v2-abstract-full').style.display = 'inline'; document.getElementById('2408.02047v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02047v2-abstract-full" style="display: none;"> Recently, the integration of mobile edge computing (MEC) and generative artificial intelligence (GAI) technology has given rise to a new area called mobile edge generation and computing (MEGC), which offers mobile users heterogeneous services such as task computing and content generation. In this letter, we investigate the joint communication, computation, and the AIGC resource allocation problem in an MEGC system. A latency minimization problem is first formulated to enhance the quality of service for mobile users. Due to the strong coupling of the optimization variables, we propose a new deep reinforcement learning-based algorithm to solve it efficiently. Numerical results demonstrate that the proposed algorithm can achieve lower latency than two baseline algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02047v2-abstract-full').style.display = 'none'; document.getElementById('2408.02047v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 6 figures. This paper has been accepted for publication by IEEE Networking Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18614">arXiv:2407.18614</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.18614">pdf</a>, <a href="https://arxiv.org/format/2407.18614">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> LookupForensics: A Large-Scale Multi-Task Dataset for Multi-Phase Image-Based Fact Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuhan Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+H+H">Huy H. Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Le%2C+T">Trung-Nghia Le</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+C">Chun-Shien Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Echizen%2C+I">Isao Echizen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18614v1-abstract-short" style="display: inline;"> Amid the proliferation of forged images, notably the tsunami of deepfake content, extensive research has been conducted on using artificial intelligence (AI) to identify forged content in the face of continuing advancements in counterfeiting technologies. We have investigated the use of AI to provide the original authentic image after deepfake detection, which we believe is a reliable and persuasi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18614v1-abstract-full').style.display = 'inline'; document.getElementById('2407.18614v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18614v1-abstract-full" style="display: none;"> Amid the proliferation of forged images, notably the tsunami of deepfake content, extensive research has been conducted on using artificial intelligence (AI) to identify forged content in the face of continuing advancements in counterfeiting technologies. We have investigated the use of AI to provide the original authentic image after deepfake detection, which we believe is a reliable and persuasive solution. We call this &#34;image-based automated fact verification,&#34; a name that originated from a text-based fact-checking system used by journalists. We have developed a two-phase open framework that integrates detection and retrieval components. Additionally, inspired by a dataset proposed by Meta Fundamental AI Research, we further constructed a large-scale dataset that is specifically designed for this task. This dataset simulates real-world conditions and includes both content-preserving and content-aware manipulations that present a range of difficulty levels and have potential for ongoing research. This multi-task dataset is fully annotated, enabling it to be utilized for sub-tasks within the forgery identification and fact retrieval domains. This paper makes two main contributions: (1) We introduce a new task, &#34;image-based automated fact verification,&#34; and present a novel two-phase open framework combining &#34;forgery identification&#34; and &#34;fact retrieval.&#34; (2) We present a large-scale dataset tailored for this new task that features various hand-crafted image edits and machine learning-driven manipulations, with extensive annotations suitable for various sub-tasks. Extensive experimental results validate its practicality for fact verification research and clarify its difficulty levels for various sub-tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18614v1-abstract-full').style.display = 'none'; document.getElementById('2407.18614v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Pages 1-13 are the main body of the paper, and pages 14-16 are the supplementary material</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.16260">arXiv:2407.16260</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.16260">pdf</a>, <a href="https://arxiv.org/format/2407.16260">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DreamDissector: Learning Disentangled Text-to-3D Generation from 2D Diffusion Priors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yan%2C+Z">Zizheng Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jiapeng Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+F">Fanpeng Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yushuang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+L">Lingteng Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+Z">Zisheng Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guanying Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+X">Xiaoguang Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.16260v1-abstract-short" style="display: inline;"> Text-to-3D generation has recently seen significant progress. To enhance its practicality in real-world applications, it is crucial to generate multiple independent objects with interactions, similar to layer-compositing in 2D image editing. However, existing text-to-3D methods struggle with this task, as they are designed to generate either non-independent objects or independent objects lacking s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16260v1-abstract-full').style.display = 'inline'; document.getElementById('2407.16260v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.16260v1-abstract-full" style="display: none;"> Text-to-3D generation has recently seen significant progress. To enhance its practicality in real-world applications, it is crucial to generate multiple independent objects with interactions, similar to layer-compositing in 2D image editing. However, existing text-to-3D methods struggle with this task, as they are designed to generate either non-independent objects or independent objects lacking spatially plausible interactions. Addressing this, we propose DreamDissector, a text-to-3D method capable of generating multiple independent objects with interactions. DreamDissector accepts a multi-object text-to-3D NeRF as input and produces independent textured meshes. To achieve this, we introduce the Neural Category Field (NeCF) for disentangling the input NeRF. Additionally, we present the Category Score Distillation Sampling (CSDS), facilitated by a Deep Concept Mining (DCM) module, to tackle the concept gap issue in diffusion models. By leveraging NeCF and CSDS, we can effectively derive sub-NeRFs from the original scene. Further refinement enhances geometry and texture. Our experimental results validate the effectiveness of DreamDissector, providing users with novel means to control 3D synthesis at the object level and potentially opening avenues for various creative applications in the future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16260v1-abstract-full').style.display = 'none'; document.getElementById('2407.16260v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024. Project page: https://chester256.github.io/dreamdissector</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02855">arXiv:2407.02855</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.02855">pdf</a>, <a href="https://arxiv.org/format/2407.02855">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Safe Unlearning: A Surprisingly Effective and Generalizable Solution to Defend Against Jailbreak Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhexin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Junxiao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Ke%2C+P">Pei Ke</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shiyao Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+C">Chujie Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongning Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+M">Minlie Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02855v2-abstract-short" style="display: inline;"> LLMs are known to be vulnerable to jailbreak attacks, even after safety alignment. An important observation is that, while different types of jailbreak attacks can generate significantly different queries, they mostly result in similar responses that are rooted in the same harmful knowledge (e.g., detailed steps to make a bomb). Therefore, we conjecture that directly unlearn the harmful knowledge&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02855v2-abstract-full').style.display = 'inline'; document.getElementById('2407.02855v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02855v2-abstract-full" style="display: none;"> LLMs are known to be vulnerable to jailbreak attacks, even after safety alignment. An important observation is that, while different types of jailbreak attacks can generate significantly different queries, they mostly result in similar responses that are rooted in the same harmful knowledge (e.g., detailed steps to make a bomb). Therefore, we conjecture that directly unlearn the harmful knowledge in the LLM can be a more effective way to defend against jailbreak attacks than the mainstream supervised fine-tuning (SFT) approaches. Our extensive experiments demonstrate the surprising generalizability of our unlearning-based approach: using only 20 raw harmful questions without any jailbreak prompt during training, our solution reduced the Attack Success Rate (ASR) in Vicuna-7B from 82.6% to 7.7% on out-of-distribution (OOD) harmful questions wrapped with various complex jailbreak prompts . This significantly outperforms Llama2-7B-Chat, which is fine-tuned on about 0.1M safety alignment samples but still has an ASR of 21.9% even under the help of an additional safety system prompt. Further analysis reveals that the generalization ability of our solution may stem from the intrinsic relatedness among harmful responses across harmful questions (e.g., response patterns, shared steps and actions in response, and similarity among their learned representations in the LLM). Our code is available at \url{https://github.com/thu-coai/SafeUnlearning}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02855v2-abstract-full').style.display = 'none'; document.getElementById('2407.02855v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00286">arXiv:2407.00286</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.00286">pdf</a>, <a href="https://arxiv.org/format/2407.00286">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Digital Twin-Assisted Data-Driven Optimization for Reliable Edge Caching in Wireless Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zifan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yuchen Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+Z">Zhiyuan Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+M">Mingzhe Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+D">Dongkuan Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00286v1-abstract-short" style="display: inline;"> Optimizing edge caching is crucial for the advancement of next-generation (nextG) wireless networks, ensuring high-speed and low-latency services for mobile users. Existing data-driven optimization approaches often lack awareness of the distribution of random data variables and focus solely on optimizing cache hit rates, neglecting potential reliability concerns, such as base station overload and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00286v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00286v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00286v1-abstract-full" style="display: none;"> Optimizing edge caching is crucial for the advancement of next-generation (nextG) wireless networks, ensuring high-speed and low-latency services for mobile users. Existing data-driven optimization approaches often lack awareness of the distribution of random data variables and focus solely on optimizing cache hit rates, neglecting potential reliability concerns, such as base station overload and unbalanced cache issues. This oversight can result in system crashes and degraded user experience. To bridge this gap, we introduce a novel digital twin-assisted optimization framework, called D-REC, which integrates reinforcement learning (RL) with diverse intervention modules to ensure reliable caching in nextG wireless networks. We first develop a joint vertical and horizontal twinning approach to efficiently create network digital twins, which are then employed by D-REC as RL optimizers and safeguards, providing ample datasets for training and predictive evaluation of our cache replacement policy. By incorporating reliability modules into a constrained Markov decision process, D-REC can adaptively adjust actions, rewards, and states to comply with advantageous constraints, minimizing the risk of network failures. Theoretical analysis demonstrates comparable convergence rates between D-REC and vanilla data-driven methods without compromising caching performance. Extensive experiments validate that D-REC outperforms conventional approaches in cache hit rate and load balancing while effectively enforcing predetermined reliability intervention modules. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00286v1-abstract-full').style.display = 'none'; document.getElementById('2407.00286v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Journal on Selected Areas in Communications (JSAC)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00047">arXiv:2407.00047</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.00047">pdf</a>, <a href="https://arxiv.org/format/2407.00047">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> One Queue Is All You Need: Resolving Head-of-Line Blocking in Large Language Model Serving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Patke%2C+A">Archit Patke</a>, <a href="/search/cs?searchtype=author&amp;query=Reddy%2C+D">Dhemath Reddy</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Saurabh Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+H">Haoran Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Pinto%2C+C">Christian Pinto</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shengkun Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Narayanaswami%2C+C">Chandra Narayanaswami</a>, <a href="/search/cs?searchtype=author&amp;query=Kalbarczyk%2C+Z">Zbigniew Kalbarczyk</a>, <a href="/search/cs?searchtype=author&amp;query=Iyer%2C+R">Ravishankar Iyer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00047v1-abstract-short" style="display: inline;"> $ $Large language models (LLMs) have become an increasingly important workload for cloud providers catering to both enterprise and consumer applications. LLM inference requests from these applications have end-to-end latency SLOs that must be adhered to in production settings. However, existing LLM serving systems focus on optimization objectives such as request serving throughput or request execu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00047v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00047v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00047v1-abstract-full" style="display: none;"> $ $Large language models (LLMs) have become an increasingly important workload for cloud providers catering to both enterprise and consumer applications. LLM inference requests from these applications have end-to-end latency SLOs that must be adhered to in production settings. However, existing LLM serving systems focus on optimization objectives such as request serving throughput or request execution latency rather than the end-to-end latency SLOs. Achieving end-to-end SLOs for latency-sensitive requests is challenging due to head-of-line (HOL) blocking in the request queue, which results from bursty arrival rates and insufficient resources. To address the above challenge, we propose QLM, a multi-model queue management framework for LLM serving. QLM uses stochastic programming to orchestrate the actions of multiple LLM Serving Operations (LSOs) to reduce HOL blocking and maximize SLO attainment. Specifically, QLM uses the following LSOs: model swapping, request eviction, GPU-CPU state swapping, load balancing, and warm model start. Evaluation on heterogeneous GPU devices and models with real-world LLM serving dataset shows that QLM improves SLO attainment by 40-90% and throughput by 20-400% while maintaining or improving device utilization compared to other state-of-the-art LLM serving systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00047v1-abstract-full').style.display = 'none'; document.getElementById('2407.00047v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19307">arXiv:2406.19307</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.19307">pdf</a>, <a href="https://arxiv.org/format/2406.19307">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> The Odyssey of Commonsense Causality: From Foundational Benchmarks to Cutting-Edge Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shaobo Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+Z">Zhijing Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Sch%C3%B6lkopf%2C+B">Bernhard Sch枚lkopf</a>, <a href="/search/cs?searchtype=author&amp;query=Faltings%2C+B">Boi Faltings</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19307v2-abstract-short" style="display: inline;"> Understanding commonsense causality is a unique mark of intelligence for humans. It helps people understand the principles of the real world better and benefits the decision-making process related to causation. For instance, commonsense causality is crucial in judging whether a defendant&#39;s action causes the plaintiff&#39;s loss in determining legal liability. Despite its significance, a systematic exp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19307v2-abstract-full').style.display = 'inline'; document.getElementById('2406.19307v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19307v2-abstract-full" style="display: none;"> Understanding commonsense causality is a unique mark of intelligence for humans. It helps people understand the principles of the real world better and benefits the decision-making process related to causation. For instance, commonsense causality is crucial in judging whether a defendant&#39;s action causes the plaintiff&#39;s loss in determining legal liability. Despite its significance, a systematic exploration of this topic is notably lacking. Our comprehensive survey bridges this gap by focusing on taxonomies, benchmarks, acquisition methods, qualitative reasoning, and quantitative measurements in commonsense causality, synthesizing insights from over 200 representative articles. Our work aims to provide a systematic overview, update scholars on recent advancements, provide a pragmatic guide for beginners, and highlight promising future research directions in this vital field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19307v2-abstract-full').style.display = 'none'; document.getElementById('2406.19307v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">42 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16072">arXiv:2406.16072</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.16072">pdf</a>, <a href="https://arxiv.org/format/2406.16072">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DV-3DLane: End-to-end Multi-modal 3D Lane Detection with Dual-view Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+Y">Yueru Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhen Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16072v1-abstract-short" style="display: inline;"> Accurate 3D lane estimation is crucial for ensuring safety in autonomous driving. However, prevailing monocular techniques suffer from depth loss and lighting variations, hampering accurate 3D lane detection. In contrast, LiDAR points offer geometric cues and enable precise localization. In this paper, we present DV-3DLane, a novel end-to-end Dual-View multi-modal 3D Lane detection framework that&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16072v1-abstract-full').style.display = 'inline'; document.getElementById('2406.16072v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16072v1-abstract-full" style="display: none;"> Accurate 3D lane estimation is crucial for ensuring safety in autonomous driving. However, prevailing monocular techniques suffer from depth loss and lighting variations, hampering accurate 3D lane detection. In contrast, LiDAR points offer geometric cues and enable precise localization. In this paper, we present DV-3DLane, a novel end-to-end Dual-View multi-modal 3D Lane detection framework that synergizes the strengths of both images and LiDAR points. We propose to learn multi-modal features in dual-view spaces, i.e., perspective view (PV) and bird&#39;s-eye-view (BEV), effectively leveraging the modal-specific information. To achieve this, we introduce three designs: 1) A bidirectional feature fusion strategy that integrates multi-modal features into each view space, exploiting their unique strengths. 2) A unified query generation approach that leverages lane-aware knowledge from both PV and BEV spaces to generate queries. 3) A 3D dual-view deformable attention mechanism, which aggregates discriminative features from both PV and BEV spaces into queries for accurate 3D lane detection. Extensive experiments on the public benchmark, OpenLane, demonstrate the efficacy and efficiency of DV-3DLane. It achieves state-of-the-art performance, with a remarkable 11.2 gain in F1 score and a substantial 53.5% reduction in errors. The code is available at \url{https://github.com/JMoonr/dv-3dlane}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16072v1-abstract-full').style.display = 'none'; document.getElementById('2406.16072v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICLR2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07389">arXiv:2406.07389</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.07389">pdf</a>, <a href="https://arxiv.org/ps/2406.07389">ps</a>, <a href="https://arxiv.org/format/2406.07389">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Robust Image Semantic Coding with Learnable CSI Fusion Masking over MIMO Fading Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xie%2C+B">Bingyan Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yongpeng Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+Y">Yuxuan Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wenjun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Debbah%2C+M">Merouane Debbah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07389v1-abstract-short" style="display: inline;"> Though achieving marvelous progress in various scenarios, existing semantic communication frameworks mainly consider single-input single-output Gaussian channels or Rayleigh fading channels, neglecting the widely-used multiple-input multiple-output (MIMO) channels, which hinders the application into practical systems. One common solution to combat MIMO fading is to utilize feedback MIMO channel st&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07389v1-abstract-full').style.display = 'inline'; document.getElementById('2406.07389v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07389v1-abstract-full" style="display: none;"> Though achieving marvelous progress in various scenarios, existing semantic communication frameworks mainly consider single-input single-output Gaussian channels or Rayleigh fading channels, neglecting the widely-used multiple-input multiple-output (MIMO) channels, which hinders the application into practical systems. One common solution to combat MIMO fading is to utilize feedback MIMO channel state information (CSI). In this paper, we incorporate MIMO CSI into system designs from a new perspective and propose the learnable CSI fusion semantic communication (LCFSC) framework, where CSI is treated as side information by the semantic extractor to enhance the semantic coding. To avoid feature fusion due to abrupt combination of CSI with features, we present a non-invasive CSI fusion multi-head attention module inside the Swin Transformer. With the learned attention masking map determined by both source and channel states, more robust attention distribution could be generated. Furthermore, the percentage of mask elements could be flexibly adjusted by the learnable mask ratio, which is produced based on the conditional variational interference in an unsupervised manner. In this way, CSI-aware semantic coding is achieved through learnable CSI fusion masking. Experiment results testify the superiority of LCFSC over traditional schemes and state-of-the-art Swin Transformer-based semantic communication frameworks in MIMO fading channels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07389v1-abstract-full').style.display = 'none'; document.getElementById('2406.07389v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by IEEE Transactions on Wireless Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.03933">arXiv:2406.03933</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.03933">pdf</a>, <a href="https://arxiv.org/format/2406.03933">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Beyond Similarity: Personalized Federated Recommendation with Composite Aggregation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Honglei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Haoxuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jundong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Sen Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+K">Kunda Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Wuerkaixi%2C+A">Abudukelimu Wuerkaixi</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xin Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Z">Zhiqi Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yidong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.03933v1-abstract-short" style="display: inline;"> Federated recommendation aims to collect global knowledge by aggregating local models from massive devices, to provide recommendations while ensuring privacy. Current methods mainly leverage aggregation functions invented by federated vision community to aggregate parameters from similar clients, e.g., clustering aggregation. Despite considerable performance, we argue that it is suboptimal to appl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03933v1-abstract-full').style.display = 'inline'; document.getElementById('2406.03933v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.03933v1-abstract-full" style="display: none;"> Federated recommendation aims to collect global knowledge by aggregating local models from massive devices, to provide recommendations while ensuring privacy. Current methods mainly leverage aggregation functions invented by federated vision community to aggregate parameters from similar clients, e.g., clustering aggregation. Despite considerable performance, we argue that it is suboptimal to apply them to federated recommendation directly. This is mainly reflected in the disparate model architectures. Different from structured parameters like convolutional neural networks in federated vision, federated recommender models usually distinguish itself by employing one-to-one item embedding table. Such a discrepancy induces the challenging embedding skew issue, which continually updates the trained embeddings but ignores the non-trained ones during aggregation, thus failing to predict future items accurately. To this end, we propose a personalized Federated recommendation model with Composite Aggregation (FedCA), which not only aggregates similar clients to enhance trained embeddings, but also aggregates complementary clients to update non-trained embeddings. Besides, we formulate the overall learning process into a unified optimization algorithm to jointly learn the similarity and complementarity. Extensive experiments on several real-world datasets substantiate the effectiveness of our proposed model. The source codes are available at https://github.com/hongleizhang/FedCA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03933v1-abstract-full').style.display = 'none'; document.getElementById('2406.03933v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19682">arXiv:2405.19682</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.19682">pdf</a>, <a href="https://arxiv.org/format/2405.19682">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Fully Test-Time Adaptation for Monocular 3D Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+H">Hongbin Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yifan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Niu%2C+S">Shuaicheng Niu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhen Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19682v1-abstract-short" style="display: inline;"> Monocular 3D object detection (Mono 3Det) aims to identify 3D objects from a single RGB image. However, existing methods often assume training and test data follow the same distribution, which may not hold in real-world test scenarios. To address the out-of-distribution (OOD) problems, we explore a new adaptation paradigm for Mono 3Det, termed Fully Test-time Adaptation. It aims to adapt a well-tr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19682v1-abstract-full').style.display = 'inline'; document.getElementById('2405.19682v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19682v1-abstract-full" style="display: none;"> Monocular 3D object detection (Mono 3Det) aims to identify 3D objects from a single RGB image. However, existing methods often assume training and test data follow the same distribution, which may not hold in real-world test scenarios. To address the out-of-distribution (OOD) problems, we explore a new adaptation paradigm for Mono 3Det, termed Fully Test-time Adaptation. It aims to adapt a well-trained model to unlabeled test data by handling potential data distribution shifts at test time without access to training data and test labels. However, applying this paradigm in Mono 3Det poses significant challenges due to OOD test data causing a remarkable decline in object detection scores. This decline conflicts with the pre-defined score thresholds of existing detection methods, leading to severe object omissions (i.e., rare positive detections and many false negatives). Consequently, the limited positive detection and plenty of noisy predictions cause test-time adaptation to fail in Mono 3Det. To handle this problem, we propose a novel Monocular Test-Time Adaptation (MonoTTA) method, based on two new strategies. 1) Reliability-driven adaptation: we empirically find that high-score objects are still reliable and the optimization of high-score objects can enhance confidence across all detections. Thus, we devise a self-adaptive strategy to identify reliable objects for model adaptation, which discovers potential objects and alleviates omissions. 2) Noise-guard adaptation: since high-score objects may be scarce, we develop a negative regularization term to exploit the numerous low-score objects via negative learning, preventing overfitting to noise and trivial solutions. Experimental results show that MonoTTA brings significant performance gains for Mono 3Det models in OOD test scenarios, approximately 190% gains by average on KITTI and 198% gains on nuScenes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19682v1-abstract-full').style.display = 'none'; document.getElementById('2405.19682v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18688">arXiv:2405.18688</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.18688">pdf</a>, <a href="https://arxiv.org/format/2405.18688">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Efficient Preference-based Reinforcement Learning via Aligned Experience Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bai%2C+F">Fengshuo Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+R">Rui Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hongming Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Sijia Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+Y">Ying Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yaodong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+B">Bo Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+L">Lei Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18688v1-abstract-short" style="display: inline;"> Preference-based reinforcement learning (PbRL) has shown impressive capabilities in training agents without reward engineering. However, a notable limitation of PbRL is its dependency on substantial human feedback. This dependency stems from the learning loop, which entails accurate reward learning compounded with value/policy learning, necessitating a considerable number of samples. To boost the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18688v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18688v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18688v1-abstract-full" style="display: none;"> Preference-based reinforcement learning (PbRL) has shown impressive capabilities in training agents without reward engineering. However, a notable limitation of PbRL is its dependency on substantial human feedback. This dependency stems from the learning loop, which entails accurate reward learning compounded with value/policy learning, necessitating a considerable number of samples. To boost the learning loop, we propose SEER, an efficient PbRL method that integrates label smoothing and policy regularization techniques. Label smoothing reduces overfitting of the reward model by smoothing human preference labels. Additionally, we bootstrap a conservative estimate $\widehat{Q}$ using well-supported state-action pairs from the current replay memory to mitigate overestimation bias and utilize it for policy learning regularization. Our experimental results across a variety of complex tasks, both in online and offline settings, demonstrate that our approach improves feedback efficiency, outperforming state-of-the-art methods by a large margin. Ablation studies further reveal that SEER achieves a more accurate Q-function compared to prior work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18688v1-abstract-full').style.display = 'none'; document.getElementById('2405.18688v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16923">arXiv:2405.16923</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.16923">pdf</a>, <a href="https://arxiv.org/format/2405.16923">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SA-GS: Semantic-Aware Gaussian Splatting for Large Scene Reconstruction with Geometry Constrain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+B">Butian Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+X">Xiaoyu Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Tse%2C+T+H+E">Tze Ho Elden Tse</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+K">Kai Han</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhen Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16923v2-abstract-short" style="display: inline;"> With the emergence of Gaussian Splats, recent efforts have focused on large-scale scene geometric reconstruction. However, most of these efforts either concentrate on memory reduction or spatial space division, neglecting information in the semantic space. In this paper, we propose a novel method, named SA-GS, for fine-grained 3D geometry reconstruction using semantic-aware 3D Gaussian Splats. Spe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16923v2-abstract-full').style.display = 'inline'; document.getElementById('2405.16923v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16923v2-abstract-full" style="display: none;"> With the emergence of Gaussian Splats, recent efforts have focused on large-scale scene geometric reconstruction. However, most of these efforts either concentrate on memory reduction or spatial space division, neglecting information in the semantic space. In this paper, we propose a novel method, named SA-GS, for fine-grained 3D geometry reconstruction using semantic-aware 3D Gaussian Splats. Specifically, we leverage prior information stored in large vision models such as SAM and DINO to generate semantic masks. We then introduce a geometric complexity measurement function to serve as soft regularization, guiding the shape of each Gaussian Splat within specific semantic areas. Additionally, we present a method that estimates the expected number of Gaussian Splats in different semantic areas, effectively providing a lower bound for Gaussian Splats in these areas. Subsequently, we extract the point cloud using a novel probability density-based extraction method, transforming Gaussian Splats into a point cloud crucial for downstream tasks. Our method also offers the potential for detailed semantic inquiries while maintaining high image-based reconstruction results. We provide extensive experiments on publicly available large-scale scene reconstruction datasets with highly accurate point clouds as ground truth and our novel dataset. Our results demonstrate the superiority of our method over current state-of-the-art Gaussian Splats reconstruction methods by a significant margin in terms of geometric-based measurement metrics. Code and additional results will soon be available on our project page. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16923v2-abstract-full').style.display = 'none'; document.getElementById('2405.16923v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Might need more comparison, will be add later</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09892">arXiv:2405.09892</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.09892">pdf</a>, <a href="https://arxiv.org/format/2405.09892">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Balancing Similarity and Complementarity for Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yan%2C+K">Kunda Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Sen Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Wuerkaixi%2C+A">Abudukelimu Wuerkaixi</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jingfeng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+B">Bo Han</a>, <a href="/search/cs?searchtype=author&amp;query=Niu%2C+G">Gang Niu</a>, <a href="/search/cs?searchtype=author&amp;query=Sugiyama%2C+M">Masashi Sugiyama</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Changshui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09892v1-abstract-short" style="display: inline;"> In mobile and IoT systems, Federated Learning (FL) is increasingly important for effectively using data while maintaining user privacy. One key challenge in FL is managing statistical heterogeneity, such as non-i.i.d. data, arising from numerous clients and diverse data sources. This requires strategic cooperation, often with clients having similar characteristics. However, we are interested in a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09892v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09892v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09892v1-abstract-full" style="display: none;"> In mobile and IoT systems, Federated Learning (FL) is increasingly important for effectively using data while maintaining user privacy. One key challenge in FL is managing statistical heterogeneity, such as non-i.i.d. data, arising from numerous clients and diverse data sources. This requires strategic cooperation, often with clients having similar characteristics. However, we are interested in a fundamental question: does achieving optimal cooperation necessarily entail cooperating with the most similar clients? Typically, significant model performance improvements are often realized not by partnering with the most similar models, but through leveraging complementary data. Our theoretical and empirical analyses suggest that optimal cooperation is achieved by enhancing complementarity in feature distribution while restricting the disparity in the correlation between features and targets. Accordingly, we introduce a novel framework, \texttt{FedSaC}, which balances similarity and complementarity in FL cooperation. Our framework aims to approximate an optimal cooperation network for each client by optimizing a weighted sum of model similarity and feature complementarity. The strength of \texttt{FedSaC} lies in its adaptability to various levels of data heterogeneity and multimodal scenarios. Our comprehensive unimodal and multimodal experiments demonstrate that \texttt{FedSaC} markedly surpasses other state-of-the-art FL methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09892v1-abstract-full').style.display = 'none'; document.getElementById('2405.09892v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.05738">arXiv:2405.05738</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.05738">pdf</a>, <a href="https://arxiv.org/format/2405.05738">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> End-to-End Generative Semantic Communication Powered by Shared Semantic Knowledge Base </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shuling Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yaping Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jinbei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+K">Kechao Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xiaodong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.05738v1-abstract-short" style="display: inline;"> Semantic communication has drawn substantial attention as a promising paradigm to achieve effective and intelligent communications. However, efficient image semantic communication encounters challenges with a lower testing compression ratio (CR) compared to the training phase. To tackle this issue, we propose an innovative semantic knowledge base (SKB)-enabled generative semantic communication sys&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05738v1-abstract-full').style.display = 'inline'; document.getElementById('2405.05738v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.05738v1-abstract-full" style="display: none;"> Semantic communication has drawn substantial attention as a promising paradigm to achieve effective and intelligent communications. However, efficient image semantic communication encounters challenges with a lower testing compression ratio (CR) compared to the training phase. To tackle this issue, we propose an innovative semantic knowledge base (SKB)-enabled generative semantic communication system for image classification and image generation tasks. Specifically, a lightweight SKB, comprising class-level information, is exploited to guide the semantic communication process, which enables us to transmit only the relevant indices. This approach promotes the completion of the image classification task at the source end and significantly reduces the transmission load. Meanwhile, the category-level knowledge in the SKB facilitates the image generation task by allowing controllable generation, making it possible to generate favorable images in resource-constrained scenarios. Additionally, semantic accuracy is introduced as a new metric to validate the performance of semantic transmission powered by the SKB. Evaluation results indicate that the proposed method outperforms the benchmarks and achieves superior performance with minimal transmission overhead, especially in the low SNR regime. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05738v1-abstract-full').style.display = 'none'; document.getElementById('2405.05738v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.03131">arXiv:2405.03131</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.03131">pdf</a>, <a href="https://arxiv.org/format/2405.03131">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> WDMoE: Wireless Distributed Large Language Models with Mixture of Experts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xue%2C+N">Nan Xue</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yaping Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhiyong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+M">Meixia Tao</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xiaodong Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Qian%2C+L">Liang Qian</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+P">Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.03131v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have achieved significant success in various natural language processing tasks, but how wireless communications can support LLMs has not been extensively studied. In this paper, we propose a wireless distributed LLMs paradigm based on Mixture of Experts (MoE), named WDMoE, deploying LLMs collaboratively across edge servers of base station (BS) and mobile devices in the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03131v1-abstract-full').style.display = 'inline'; document.getElementById('2405.03131v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.03131v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have achieved significant success in various natural language processing tasks, but how wireless communications can support LLMs has not been extensively studied. In this paper, we propose a wireless distributed LLMs paradigm based on Mixture of Experts (MoE), named WDMoE, deploying LLMs collaboratively across edge servers of base station (BS) and mobile devices in the wireless communications system. Specifically, we decompose the MoE layer in LLMs by deploying the gating network and the preceding neural network layer at BS, while distributing the expert networks across the devices. This arrangement leverages the parallel capabilities of expert networks on distributed devices. Moreover, to overcome the instability of wireless communications, we design an expert selection policy by taking into account both the performance of the model and the end-to-end latency, which includes both transmission delay and inference delay. Evaluations conducted across various LLMs and multiple datasets demonstrate that WDMoE not only outperforms existing models, such as Llama 2 with 70 billion parameters, but also significantly reduces end-to-end latency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03131v1-abstract-full').style.display = 'none'; document.getElementById('2405.03131v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to IEEE conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.00736">arXiv:2405.00736</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.00736">pdf</a>, <a href="https://arxiv.org/format/2405.00736">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Joint Signal Detection and Automatic Modulation Classification via Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xing%2C+H">Huijun Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xuhui Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+S">Shuo Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+J">Jinke Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zixun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.00736v1-abstract-short" style="display: inline;"> Signal detection and modulation classification are two crucial tasks in various wireless communication systems. Different from prior works that investigate them independently, this paper studies the joint signal detection and automatic modulation classification (AMC) by considering a realistic and complex scenario, in which multiple signals with different modulation schemes coexist at different ca&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00736v1-abstract-full').style.display = 'inline'; document.getElementById('2405.00736v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.00736v1-abstract-full" style="display: none;"> Signal detection and modulation classification are two crucial tasks in various wireless communication systems. Different from prior works that investigate them independently, this paper studies the joint signal detection and automatic modulation classification (AMC) by considering a realistic and complex scenario, in which multiple signals with different modulation schemes coexist at different carrier frequencies. We first generate a coexisting RADIOML dataset (CRML23) to facilitate the joint design. Different from the publicly available AMC dataset ignoring the signal detection step and containing only one signal, our synthetic dataset covers the more realistic multiple-signal coexisting scenario. Then, we present a joint framework for detection and classification (JDM) for such a multiple-signal coexisting environment, which consists of two modules for signal detection and AMC, respectively. In particular, these two modules are interconnected using a designated data structure called &#34;proposal&#34;. Finally, we conduct extensive simulations over the newly developed dataset, which demonstrate the effectiveness of our designs. Our code and dataset are now available as open-source (https://github.com/Singingkettle/ChangShuoRadioData). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00736v1-abstract-full').style.display = 'none'; document.getElementById('2405.00736v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17845">arXiv:2404.17845</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.17845">pdf</a>, <a href="https://arxiv.org/format/2404.17845">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Instance-free Text to Point Cloud Localization with Relative Position Awareness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Lichao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+Z">Zhihao Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+J">Jinke Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhen Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17845v1-abstract-short" style="display: inline;"> Text-to-point-cloud cross-modal localization is an emerging vision-language task critical for future robot-human collaboration. It seeks to localize a position from a city-scale point cloud scene based on a few natural language instructions. In this paper, we address two key limitations of existing approaches: 1) their reliance on ground-truth instances as input; and 2) their neglect of the relati&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17845v1-abstract-full').style.display = 'inline'; document.getElementById('2404.17845v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17845v1-abstract-full" style="display: none;"> Text-to-point-cloud cross-modal localization is an emerging vision-language task critical for future robot-human collaboration. It seeks to localize a position from a city-scale point cloud scene based on a few natural language instructions. In this paper, we address two key limitations of existing approaches: 1) their reliance on ground-truth instances as input; and 2) their neglect of the relative positions among potential instances. Our proposed model follows a two-stage pipeline, including a coarse stage for text-cell retrieval and a fine stage for position estimation. In both stages, we introduce an instance query extractor, in which the cells are encoded by a 3D sparse convolution U-Net to generate the multi-scale point cloud features, and a set of queries iteratively attend to these features to represent instances. In the coarse stage, a row-column relative position-aware self-attention (RowColRPA) module is designed to capture the spatial relations among the instance queries. In the fine stage, a multi-modal relative position-aware cross-attention (RPCA) module is developed to fuse the text and point cloud features along with spatial relations for improving fine position estimation. Experiment results on the KITTI360Pose dataset demonstrate that our model achieves competitive performance with the state-of-the-art models without taking ground-truth instances as input. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17845v1-abstract-full').style.display = 'none'; document.getElementById('2404.17845v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 10 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16336">arXiv:2404.16336</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.16336">pdf</a>, <a href="https://arxiv.org/format/2404.16336">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FedStyle: Style-Based Federated Learning Crowdsourcing Framework for Art Commissions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ran%2C+C">Changjuan Ran</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yeting Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+F">Fang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shenglan Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+Y">Yunfan Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16336v1-abstract-short" style="display: inline;"> The unique artistic style is crucial to artists&#39; occupational competitiveness, yet prevailing Art Commission Platforms rarely support style-based retrieval. Meanwhile, the fast-growing generative AI techniques aggravate artists&#39; concerns about releasing personal artworks to public platforms. To achieve artistic style-based retrieval without exposing personal artworks, we propose FedStyle, a style-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16336v1-abstract-full').style.display = 'inline'; document.getElementById('2404.16336v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16336v1-abstract-full" style="display: none;"> The unique artistic style is crucial to artists&#39; occupational competitiveness, yet prevailing Art Commission Platforms rarely support style-based retrieval. Meanwhile, the fast-growing generative AI techniques aggravate artists&#39; concerns about releasing personal artworks to public platforms. To achieve artistic style-based retrieval without exposing personal artworks, we propose FedStyle, a style-based federated learning crowdsourcing framework. It allows artists to train local style models and share model parameters rather than artworks for collaboration. However, most artists possess a unique artistic style, resulting in severe model drift among them. FedStyle addresses such extreme data heterogeneity by having artists learn their abstract style representations and align with the server, rather than merely aggregating model parameters lacking semantics. Besides, we introduce contrastive learning to meticulously construct the style representation space, pulling artworks with similar styles closer and keeping different ones apart in the embedding space. Extensive experiments on the proposed datasets demonstrate the superiority of FedStyle. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16336v1-abstract-full').style.display = 'none'; document.getElementById('2404.16336v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICME 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16152">arXiv:2404.16152</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.16152">pdf</a>, <a href="https://arxiv.org/ps/2404.16152">ps</a>, <a href="https://arxiv.org/format/2404.16152">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Grant-Free Protocol in mMTC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+M">Minhao Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yifei Sun</a>, <a href="/search/cs?searchtype=author&amp;query=You%2C+L">Lizhao You</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhaorui Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Ya-Feng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16152v1-abstract-short" style="display: inline;"> This paper revisits the identity detection problem under the current grant-free protocol in massive machine-type communications (mMTC) by asking the following question: for stable identity detection performance, is it enough to permit active devices to transmit preambles without any handshaking with the base station (BS)? Specifically, in the current grant-free protocol, the BS blindly allocates a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16152v1-abstract-full').style.display = 'inline'; document.getElementById('2404.16152v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16152v1-abstract-full" style="display: none;"> This paper revisits the identity detection problem under the current grant-free protocol in massive machine-type communications (mMTC) by asking the following question: for stable identity detection performance, is it enough to permit active devices to transmit preambles without any handshaking with the base station (BS)? Specifically, in the current grant-free protocol, the BS blindly allocates a fixed length of preamble to devices for identity detection as it lacks the prior information on the number of active devices $K$. However, in practice, $K$ varies dynamically over time, resulting in degraded identity detection performance especially when $K$ is large. Consequently, the current grant-free protocol fails to ensure stable identity detection performance. To address this issue, we propose a two-stage communication protocol which consists of estimation of $K$ in Phase I and detection of identities of active devices in Phase II. The preamble length for identity detection in Phase II is dynamically allocated based on the estimated $K$ in Phase I through a table lookup manner such that the identity detection performance could always be better than a predefined threshold. In addition, we design an algorithm for estimating $K$ in Phase I, and exploit the estimated $K$ to reduce the computational complexity of the identity detector in Phase II. Numerical results demonstrate the effectiveness of the proposed two-stage communication protocol and algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16152v1-abstract-full').style.display = 'none'; document.getElementById('2404.16152v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.08509">arXiv:2404.08509</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.08509">pdf</a>, <a href="https://arxiv.org/format/2404.08509">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Efficient Interactive LLM Serving with Proxy Model-based Sequence Length Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+H">Haoran Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+W">Weichao Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Patke%2C+A">Archit Patke</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shengkun Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Saurabh Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Chen Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Franke%2C+H">Hubertus Franke</a>, <a href="/search/cs?searchtype=author&amp;query=Kalbarczyk%2C+Z+T">Zbigniew T. Kalbarczyk</a>, <a href="/search/cs?searchtype=author&amp;query=Ba%C5%9Far%2C+T">Tamer Ba艧ar</a>, <a href="/search/cs?searchtype=author&amp;query=Iyer%2C+R+K">Ravishankar K. Iyer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.08509v1-abstract-short" style="display: inline;"> Large language models (LLMs) have been driving a new wave of interactive AI applications across numerous domains. However, efficiently serving LLM inference requests is challenging due to their unpredictable execution times originating from the autoregressive nature of generative models. Existing LLM serving systems exploit first-come-first-serve (FCFS) scheduling, suffering from head-of-line bloc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08509v1-abstract-full').style.display = 'inline'; document.getElementById('2404.08509v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.08509v1-abstract-full" style="display: none;"> Large language models (LLMs) have been driving a new wave of interactive AI applications across numerous domains. However, efficiently serving LLM inference requests is challenging due to their unpredictable execution times originating from the autoregressive nature of generative models. Existing LLM serving systems exploit first-come-first-serve (FCFS) scheduling, suffering from head-of-line blocking issues. To address the non-deterministic nature of LLMs and enable efficient interactive LLM serving, we present a speculative shortest-job-first (SSJF) scheduler that uses a light proxy model to predict LLM output sequence lengths. Our open-source SSJF implementation does not require changes to memory management or batching strategies. Evaluations on real-world datasets and production workload traces show that SSJF reduces average job completion times by 30.5-39.6% and increases throughput by 2.2-3.6x compared to FCFS schedulers, across no batching, dynamic batching, and continuous batching settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08509v1-abstract-full').style.display = 'none'; document.getElementById('2404.08509v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AIOps&#39;24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00836">arXiv:2404.00836</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.00836">pdf</a>, <a href="https://arxiv.org/ps/2404.00836">ps</a>, <a href="https://arxiv.org/format/2404.00836">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Resource Management in Edge Learning: A Joint Pre-training and Fine-tuning Design Paradigm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lyu%2C+Z">Zhonghao Lyu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yuchen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+G">Guangxu Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Poor%2C+H+V">H. Vincent Poor</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00836v1-abstract-short" style="display: inline;"> In some applications, edge learning is experiencing a shift in focusing from conventional learning from scratch to new two-stage learning unifying pre-training and task-specific fine-tuning. This paper considers the problem of joint communication and computation resource management in a two-stage edge learning system. In this system, model pre-training is first conducted at an edge server via cent&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00836v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00836v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00836v1-abstract-full" style="display: none;"> In some applications, edge learning is experiencing a shift in focusing from conventional learning from scratch to new two-stage learning unifying pre-training and task-specific fine-tuning. This paper considers the problem of joint communication and computation resource management in a two-stage edge learning system. In this system, model pre-training is first conducted at an edge server via centralized learning on local pre-stored general data, and then task-specific fine-tuning is performed at edge devices based on the pre-trained model via federated edge learning. For the two-stage learning model, we first analyze the convergence behavior (in terms of the average squared gradient norm bound), which characterizes the impacts of various system parameters such as the number of learning rounds and batch sizes in the two stages on the convergence rate. Based on our analytical results, we then propose a joint communication and computation resource management design to minimize an average squared gradient norm bound, subject to constraints on the transmit power, overall system energy consumption, and training delay. The decision variables include the number of learning rounds, batch sizes, clock frequencies, and transmit power control for both pre-training and fine-tuning stages. Finally, numerical results are provided to evaluate the effectiveness of our proposed design. It is shown that the proposed joint resource management over the pre-training and fine-tuning stages well balances the system performance trade-off among the training accuracy, delay, and energy consumption. The proposed design is also shown to effectively leverage the inherent trade-off between pre-training and fine-tuning, which arises from the differences in data distribution between pre-stored general data versus real-time task-specific data, thus efficiently optimizing overall system performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00836v1-abstract-full').style.display = 'none'; document.getElementById('2404.00836v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00269">arXiv:2404.00269</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.00269">pdf</a>, <a href="https://arxiv.org/format/2404.00269">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> IPoD: Implicit Field Learning with Point Diffusion for Generalizable 3D Object Reconstruction from Single RGB-D Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yushuang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+L">Luyue Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+J">Junhao Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+W">Weihao Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+L">Lingteng Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Z">Zilong Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Bo%2C+L">Liefeng Bo</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+X">Xiaoguang Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00269v1-abstract-short" style="display: inline;"> Generalizable 3D object reconstruction from single-view RGB-D images remains a challenging task, particularly with real-world data. Current state-of-the-art methods develop Transformer-based implicit field learning, necessitating an intensive learning paradigm that requires dense query-supervision uniformly sampled throughout the entire space. We propose a novel approach, IPoD, which harmonizes im&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00269v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00269v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00269v1-abstract-full" style="display: none;"> Generalizable 3D object reconstruction from single-view RGB-D images remains a challenging task, particularly with real-world data. Current state-of-the-art methods develop Transformer-based implicit field learning, necessitating an intensive learning paradigm that requires dense query-supervision uniformly sampled throughout the entire space. We propose a novel approach, IPoD, which harmonizes implicit field learning with point diffusion. This approach treats the query points for implicit field learning as a noisy point cloud for iterative denoising, allowing for their dynamic adaptation to the target object shape. Such adaptive query points harness diffusion learning&#39;s capability for coarse shape recovery and also enhances the implicit representation&#39;s ability to delineate finer details. Besides, an additional self-conditioning mechanism is designed to use implicit predictions as the guidance of diffusion learning, leading to a cooperative system. Experiments conducted on the CO3D-v2 dataset affirm the superiority of IPoD, achieving 7.8% improvement in F-score and 28.6% in Chamfer distance over existing methods. The generalizability of IPoD is also demonstrated on the MVImgNet dataset. Our project page is at https://yushuang-wu.github.io/IPoD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00269v1-abstract-full').style.display = 'none'; document.getElementById('2404.00269v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00014">arXiv:2404.00014</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.00014">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Deep Geometry Handling and Fragment-wise Molecular 3D Graph Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+O">Odin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yufei Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+S">Shichen Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+M">Mengyao Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xujun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+H">Haitao Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+Y">Yundian Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Mingyang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zhenxing Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+H">Huifeng Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zaixi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+C">Chenqing Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+Y">Yu Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Sunliang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+P">Peichen Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Hsieh%2C+C">Chang-Yu Hsieh</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+T">Tingjun Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00014v1-abstract-short" style="display: inline;"> Most earlier 3D structure-based molecular generation approaches follow an atom-wise paradigm, incrementally adding atoms to a partially built molecular fragment within protein pockets. These methods, while effective in designing tightly bound ligands, often overlook other essential properties such as synthesizability. The fragment-wise generation paradigm offers a promising solution. However, a co&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00014v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00014v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00014v1-abstract-full" style="display: none;"> Most earlier 3D structure-based molecular generation approaches follow an atom-wise paradigm, incrementally adding atoms to a partially built molecular fragment within protein pockets. These methods, while effective in designing tightly bound ligands, often overlook other essential properties such as synthesizability. The fragment-wise generation paradigm offers a promising solution. However, a common challenge across both atom-wise and fragment-wise methods lies in their limited ability to co-design plausible chemical and geometrical structures, resulting in distorted conformations. In response to this challenge, we introduce the Deep Geometry Handling protocol, a more abstract design that extends the design focus beyond the model architecture. Through a comprehensive review of existing geometry-related models and their protocols, we propose a novel hybrid strategy, culminating in the development of FragGen - a geometry-reliable, fragment-wise molecular generation method. FragGen marks a significant leap forward in the quality of generated geometry and the synthesis accessibility of molecules. The efficacy of FragGen is further validated by its successful application in designing type II kinase inhibitors at the nanomolar level. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00014v1-abstract-full').style.display = 'none'; document.getElementById('2404.00014v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.17652">arXiv:2403.17652</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.17652">pdf</a>, <a href="https://arxiv.org/ps/2403.17652">ps</a>, <a href="https://arxiv.org/format/2403.17652">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Leveraging A Variety of Anchors in Cellular Network for Ubiquitous Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+L">Liang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shuowen Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.17652v1-abstract-short" style="display: inline;"> Integrated sensing and communication (ISAC) has recently attracted tremendous attention from both academia and industry, being envisioned as a key part of the standards for the sixth-generation (6G) cellular network. A key challenge of 6G-oriented ISAC lies in how to perform ubiquitous sensing based on the communication signals and devices. Previous works have made great progresses on studying the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17652v1-abstract-full').style.display = 'inline'; document.getElementById('2403.17652v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.17652v1-abstract-full" style="display: none;"> Integrated sensing and communication (ISAC) has recently attracted tremendous attention from both academia and industry, being envisioned as a key part of the standards for the sixth-generation (6G) cellular network. A key challenge of 6G-oriented ISAC lies in how to perform ubiquitous sensing based on the communication signals and devices. Previous works have made great progresses on studying the signal waveform design that leads to optimal communication-sensing performance tradeoff. In this article, we aim to focus on issues arising from the exploitation of the communication devices for sensing in 6G network. Particularly, we will discuss about how to leverage various nodes available in the cellular network as anchors to perform ubiquitous sensing. On one hand, the base stations (BSs) will be the most important anchors in the future 6G ISAC network, since they can generate/process radio signals with high range/angle resolutions, and their positions are precisely known. Correspondingly, we will first study the BS-based sensing technique. On the other hand, the BSs alone may not enable ubiquitous sensing, since they cannot cover all the places with strong line-of-sight (LOS) links. This motivates us to investigate the possibility of using other nodes that are with higher density in the network to act as the anchors. Along this line, we are interested in two types of new anchors - user equipments (UEs) and reconfigurable intelligent surfaces (RISs). This paper will shed light on the opportunities and challenges brought by UE-assisted sensing and RIS-assisted sensing. Our goal is to devise a novel 6G-oriented sensing architecture where BSs, UEs, and RISs can work together to provide ubiquitous sensing services. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17652v1-abstract-full').style.display = 'none'; document.getElementById('2403.17652v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">to appear in IEEE Communications Magazine</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16353">arXiv:2403.16353</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.16353">pdf</a>, <a href="https://arxiv.org/format/2403.16353">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Energy-Efficient Hybrid Beamforming with Dynamic On-off Control for Integrated Sensing, Communications, and Powering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hao%2C+Z">Zeyu Hao</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+Y">Yuan Fang</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+X">Xianghao Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+L">Ling Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+L">Lexi Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16353v1-abstract-short" style="display: inline;"> This paper investigates the energy-efficient hybrid beamforming design for a multi-functional integrated sensing, communications, and powering (ISCAP) system. In this system, a base station (BS) with a hybrid analog-digital (HAD) architecture sends unified wireless signals to communicate with multiple information receivers (IRs), sense multiple point targets, and wirelessly charge multiple energy&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16353v1-abstract-full').style.display = 'inline'; document.getElementById('2403.16353v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16353v1-abstract-full" style="display: none;"> This paper investigates the energy-efficient hybrid beamforming design for a multi-functional integrated sensing, communications, and powering (ISCAP) system. In this system, a base station (BS) with a hybrid analog-digital (HAD) architecture sends unified wireless signals to communicate with multiple information receivers (IRs), sense multiple point targets, and wirelessly charge multiple energy receivers (ERs) at the same time. To facilitate the energy-efficient design, we present a novel HAD architecture for the BS transmitter, which allows dynamic on-off control of its radio frequency (RF) chains and analog phase shifters (PSs) through a switch network. We also consider a practical and comprehensive power consumption model for the BS, by taking into account the power-dependent non-linear power amplifier (PA) efficiency, and the on-off non-transmission power consumption model of RF chains and PSs. We jointly design the hybrid beamforming and dynamic on-off control at the BS, aiming to minimize its total power consumption, while guaranteeing the performance requirements on communication rates, sensing Cram茅r-Rao bound (CRB), and harvested power levels. The formulation also takes into consideration the per-antenna transmit power constraint and the constant modulus constraints for the analog beamformer at the BS. The resulting optimization problem for ISCAP is highly non-convex. Please refer to the paper for a complete abstract. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16353v1-abstract-full').style.display = 'none'; document.getElementById('2403.16353v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 6 figures, submitted to IEEE Transactions on Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.13535">arXiv:2403.13535</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.13535">pdf</a>, <a href="https://arxiv.org/format/2403.13535">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> IDAdapter: Learning Mixed Features for Tuning-Free Personalization of Text-to-Image Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Siying Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+J">Jia Guo</a>, <a href="/search/cs?searchtype=author&amp;query=An%2C+X">Xiang An</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+J">Jiankang Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yongle Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+X">Xinyu Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Z">Ziyong Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.13535v2-abstract-short" style="display: inline;"> Leveraging Stable Diffusion for the generation of personalized portraits has emerged as a powerful and noteworthy tool, enabling users to create high-fidelity, custom character avatars based on their specific prompts. However, existing personalization methods face challenges, including test-time fine-tuning, the requirement of multiple input images, low preservation of identity, and limited divers&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13535v2-abstract-full').style.display = 'inline'; document.getElementById('2403.13535v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.13535v2-abstract-full" style="display: none;"> Leveraging Stable Diffusion for the generation of personalized portraits has emerged as a powerful and noteworthy tool, enabling users to create high-fidelity, custom character avatars based on their specific prompts. However, existing personalization methods face challenges, including test-time fine-tuning, the requirement of multiple input images, low preservation of identity, and limited diversity in generated outcomes. To overcome these challenges, we introduce IDAdapter, a tuning-free approach that enhances the diversity and identity preservation in personalized image generation from a single face image. IDAdapter integrates a personalized concept into the generation process through a combination of textual and visual injections and a face identity loss. During the training phase, we incorporate mixed features from multiple reference images of a specific identity to enrich identity-related content details, guiding the model to generate images with more diverse styles, expressions, and angles compared to previous works. Extensive evaluations demonstrate the effectiveness of our method, achieving both diversity and identity fidelity in generated images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13535v2-abstract-full').style.display = 'none'; document.getElementById('2403.13535v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11812">arXiv:2403.11812</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.11812">pdf</a>, <a href="https://arxiv.org/format/2403.11812">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Aerial Lifting: Neural Urban Semantic and Building Instance Lifting from Aerial Imagery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yuqi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guanying Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jiaxing Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11812v1-abstract-short" style="display: inline;"> We present a neural radiance field method for urban-scale semantic and building-level instance segmentation from aerial images by lifting noisy 2D labels to 3D. This is a challenging problem due to two primary reasons. Firstly, objects in urban aerial images exhibit substantial variations in size, including buildings, cars, and roads, which pose a significant challenge for accurate 2D segmentation&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11812v1-abstract-full').style.display = 'inline'; document.getElementById('2403.11812v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11812v1-abstract-full" style="display: none;"> We present a neural radiance field method for urban-scale semantic and building-level instance segmentation from aerial images by lifting noisy 2D labels to 3D. This is a challenging problem due to two primary reasons. Firstly, objects in urban aerial images exhibit substantial variations in size, including buildings, cars, and roads, which pose a significant challenge for accurate 2D segmentation. Secondly, the 2D labels generated by existing segmentation methods suffer from the multi-view inconsistency problem, especially in the case of aerial images, where each image captures only a small portion of the entire scene. To overcome these limitations, we first introduce a scale-adaptive semantic label fusion strategy that enhances the segmentation of objects of varying sizes by combining labels predicted from different altitudes, harnessing the novel-view synthesis capabilities of NeRF. We then introduce a novel cross-view instance label grouping strategy based on the 3D scene representation to mitigate the multi-view inconsistency problem in the 2D instance labels. Furthermore, we exploit multi-view reconstructed depth priors to improve the geometric quality of the reconstructed radiance field, resulting in enhanced segmentation results. Experiments on multiple real-world urban-scale datasets demonstrate that our approach outperforms existing methods, highlighting its effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11812v1-abstract-full').style.display = 'none'; document.getElementById('2403.11812v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2024: https://zyqz97.github.io/Aerial_Lifting/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11809">arXiv:2403.11809</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.11809">pdf</a>, <a href="https://arxiv.org/format/2403.11809">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Sensing-Enhanced Channel Estimation for Near-Field XL-MIMO Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+S">Shicong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+X">Xianghao Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+Z">Zhen Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Ng%2C+D+W+K">Derrick Wing Kwan Ng</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11809v3-abstract-short" style="display: inline;"> Future sixth-generation (6G) systems are expected to leverage extremely large-scale multiple-input multiple-output (XL-MIMO) technology, which significantly expands the range of the near-field region. The spherical wavefront characteristics in the near field introduce additional degrees of freedom (DoFs), namely distance and angle, into the channel model, which leads to unique challenges in channe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11809v3-abstract-full').style.display = 'inline'; document.getElementById('2403.11809v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11809v3-abstract-full" style="display: none;"> Future sixth-generation (6G) systems are expected to leverage extremely large-scale multiple-input multiple-output (XL-MIMO) technology, which significantly expands the range of the near-field region. The spherical wavefront characteristics in the near field introduce additional degrees of freedom (DoFs), namely distance and angle, into the channel model, which leads to unique challenges in channel estimation (CE). In this paper, we propose a new sensing-enhanced uplink CE scheme for near-field XL-MIMO, which notably reduces the required quantity of baseband samples and the dictionary size. In particular, we first propose a sensing method that can be accomplished in a single time slot. It employs power sensors embedded within the antenna elements to measure the received power pattern rather than baseband samples. A time inversion algorithm is then proposed to precisely estimate the locations of users and scatterers, which offers a substantially lower computational complexity. Based on the estimated locations from sensing, a novel dictionary is then proposed by considering the eigen-problem based on the near-field transmission model, which facilitates efficient near-field CE with less baseband sampling and a more lightweight dictionary. Moreover, we derive the general form of the eigenvectors associated with the near-field channel matrix, revealing their noteworthy connection to the discrete prolate spheroidal sequence (DPSS). Simulation results unveil that the proposed time inversion algorithm achieves accurate localization with power measurements only, and remarkably outperforms various widely-adopted algorithms in terms of computational complexity. Furthermore, the proposed eigen-dictionary considerably improves the accuracy in CE with a compact dictionary size and a drastic reduction in baseband samples by up to 66%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11809v3-abstract-full').style.display = 'none'; document.getElementById('2403.11809v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.04086">arXiv:2403.04086</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.04086">pdf</a>, <a href="https://arxiv.org/format/2403.04086">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Automated Multi-Task Learning for Joint Disease Prediction on Electronic Health Records </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Suhan Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Mitra%2C+P">Prasenjit Mitra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.04086v3-abstract-short" style="display: inline;"> In the realm of big data and digital healthcare, Electronic Health Records (EHR) have become a rich source of information with the potential to improve patient care and medical research. In recent years, machine learning models have proliferated for analyzing EHR data to predict patients future health conditions. Among them, some studies advocate for multi-task learning (MTL) to jointly predict mu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04086v3-abstract-full').style.display = 'inline'; document.getElementById('2403.04086v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.04086v3-abstract-full" style="display: none;"> In the realm of big data and digital healthcare, Electronic Health Records (EHR) have become a rich source of information with the potential to improve patient care and medical research. In recent years, machine learning models have proliferated for analyzing EHR data to predict patients future health conditions. Among them, some studies advocate for multi-task learning (MTL) to jointly predict multiple target diseases for improving the prediction performance over single task learning. Nevertheless, current MTL frameworks for EHR data have significant limitations due to their heavy reliance on human experts to identify task groups for joint training and design model architectures. To reduce human intervention and improve the framework design, we propose an automated approach named AutoDP, which can search for the optimal configuration of task grouping and architectures simultaneously. To tackle the vast joint search space encompassing task combinations and architectures, we employ surrogate model-based optimization, enabling us to efficiently discover the optimal solution. Experimental results on real-world EHR data demonstrate the efficacy of the proposed AutoDP framework. It achieves significant performance improvements over both hand-crafted and automated state-of-the-art methods, also maintains a feasible search cost at the same time. Source code can be found via the link: \url{https://github.com/SH-Src/AutoDP}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.04086v3-abstract-full').style.display = 'none'; document.getElementById('2403.04086v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00829">arXiv:2403.00829</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.00829">pdf</a>, <a href="https://arxiv.org/format/2403.00829">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TroubleLLM: Align to Red Team Expert </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Z">Zhuoer Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jianping Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shiwen Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+C">Changhua Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Weiqiang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00829v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) become the start-of-the-art solutions for a variety of natural language tasks and are integrated into real-world applications. However, LLMs can be potentially harmful in manifesting undesirable safety issues like social biases and toxic content. It is imperative to assess its safety issues before deployment. However, the quality and diversity of test prompts generated&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00829v1-abstract-full').style.display = 'inline'; document.getElementById('2403.00829v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00829v1-abstract-full" style="display: none;"> Large Language Models (LLMs) become the start-of-the-art solutions for a variety of natural language tasks and are integrated into real-world applications. However, LLMs can be potentially harmful in manifesting undesirable safety issues like social biases and toxic content. It is imperative to assess its safety issues before deployment. However, the quality and diversity of test prompts generated by existing methods are still far from satisfactory. Not only are these methods labor-intensive and require large budget costs, but the controllability of test prompt generation is lacking for the specific testing domain of LLM applications. With the idea of LLM for LLM testing, we propose the first LLM, called TroubleLLM, to generate controllable test prompts on LLM safety issues. Extensive experiments and human evaluation illustrate the superiority of TroubleLLM on generation quality and generation controllability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00829v1-abstract-full').style.display = 'none'; document.getElementById('2403.00829v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.19009">arXiv:2402.19009</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.19009">pdf</a>, <a href="https://arxiv.org/format/2402.19009">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unified Generation, Reconstruction, and Representation: Generalized Diffusion with Adaptive Latent Encoding-Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+G">Guangyi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Z">Zeyu Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Q">Qiyu Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+L">Liping Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+Y">Yuan Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=McAuley%2C+J">Julian McAuley</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zichao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+E+P">Eric P. Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Z">Zhiting Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.19009v2-abstract-short" style="display: inline;"> The vast applications of deep generative models are anchored in three core capabilities -- generating new instances, reconstructing inputs, and learning compact representations -- across various data types, such as discrete text/protein sequences and continuous images. Existing model families, like variational autoencoders (VAEs), generative adversarial networks (GANs), autoregressive models, and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.19009v2-abstract-full').style.display = 'inline'; document.getElementById('2402.19009v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.19009v2-abstract-full" style="display: none;"> The vast applications of deep generative models are anchored in three core capabilities -- generating new instances, reconstructing inputs, and learning compact representations -- across various data types, such as discrete text/protein sequences and continuous images. Existing model families, like variational autoencoders (VAEs), generative adversarial networks (GANs), autoregressive models, and (latent) diffusion models, generally excel in specific capabilities and data types but fall short in others. We introduce Generalized Encoding-Decoding Diffusion Probabilistic Models (EDDPMs) which integrate the core capabilities for broad applicability and enhanced performance. EDDPMs generalize the Gaussian noising-denoising in standard diffusion by introducing parameterized encoding-decoding. Crucially, EDDPMs are compatible with the well-established diffusion model objective and training recipes, allowing effective learning of the encoder-decoder parameters jointly with diffusion. By choosing appropriate encoder/decoder (e.g., large language models), EDDPMs naturally apply to different data types. Extensive experiments on text, proteins, and images demonstrate the flexibility to handle diverse data and tasks and the strong improvement over various existing models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.19009v2-abstract-full').style.display = 'none'; document.getElementById('2402.19009v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2024 camera-ready. Code is available at https://github.com/guangyliu/EDDPM</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.16868">arXiv:2402.16868</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.16868">pdf</a>, <a href="https://arxiv.org/format/2402.16868">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Codebook-enabled Generative End-to-end Semantic Communication Powered by Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ye%2C+P">Peigen Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yaping Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+S">Shumin Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xiaodong Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.16868v2-abstract-short" style="display: inline;"> Codebook-based generative semantic communication attracts increasing attention, since only indices are required to be transmitted when the codebook is shared between transmitter and receiver. However, due to the fact that the semantic relations among code vectors are not necessarily related to the distance of the corresponding code indices, the performance of the codebook-enabled semantic communic&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.16868v2-abstract-full').style.display = 'inline'; document.getElementById('2402.16868v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.16868v2-abstract-full" style="display: none;"> Codebook-based generative semantic communication attracts increasing attention, since only indices are required to be transmitted when the codebook is shared between transmitter and receiver. However, due to the fact that the semantic relations among code vectors are not necessarily related to the distance of the corresponding code indices, the performance of the codebook-enabled semantic communication system is susceptible to the channel noise. Thus, how to improve the system robustness against the noise requires careful design. This paper proposes a robust codebook-assisted image semantic communication system, where semantic codec and codebook are first jointly constructed, and then vector-to-index transformer is designed guided by the codebook to eliminate the effects of channel noise, and achieve image generation. Thanks to the assistance of the high-quality codebook to the Transformer, the generated images at the receiver outperform those of the compared methods in terms of visual perception. In the end, numerical results and generated images demonstrate the advantages of the generative semantic communication method over JPEG+LDPC and traditional joint source channel coding (JSCC) methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.16868v2-abstract-full').style.display = 'none'; document.getElementById('2402.16868v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE INFOCOM PerAI6G 2024(accepted)</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cui%2C+S&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10