Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 91 results for author: <span class="mathjax">Xiong, Z</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Xiong%2C+Z">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Xiong, Z"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Xiong%2C+Z&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Xiong, Z"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xiong%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xiong%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xiong%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.06149">arXiv:2503.06149</a> <span> [<a href="https://arxiv.org/pdf/2503.06149">pdf</a>, <a href="https://arxiv.org/format/2503.06149">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Wireless Hallucination in Generative AI-enabled Communications: Concepts, Issues, and Solutions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xudong Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+L">Lei Feng</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Mao%2C+S">Shiwen Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.06149v1-abstract-short" style="display: inline;"> Generative AI (GenAI) is driving the intelligence of wireless communications. Due to data limitations, random generation, and dynamic environments, GenAI may generate channel information or optimization strategies that violate physical laws or deviate from actual real-world requirements. We refer to this phenomenon as wireless hallucination, which results in invalid channel information, spectrum w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.06149v1-abstract-full').style.display = 'inline'; document.getElementById('2503.06149v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.06149v1-abstract-full" style="display: none;"> Generative AI (GenAI) is driving the intelligence of wireless communications. Due to data limitations, random generation, and dynamic environments, GenAI may generate channel information or optimization strategies that violate physical laws or deviate from actual real-world requirements. We refer to this phenomenon as wireless hallucination, which results in invalid channel information, spectrum wastage, and low communication reliability but remains underexplored. To address this gap, this article provides a comprehensive concept of wireless hallucinations in GenAI-driven communications, focusing on hallucination mitigation. Specifically, we first introduce the fundamental, analyze its causes based on the GenAI workflow, and propose mitigation solutions at the data, model, and post-generation levels. Then, we systematically examines representative hallucination scenarios in GenAI-enabled communications and their corresponding solutions. Finally, we propose a novel integrated mitigation solution for GenAI-based channel estimation. At the data level, we establish a channel estimation hallucination dataset and employ generative adversarial networks (GANs)-based data augmentation. Additionally, we incorporate attention mechanisms and large language models (LLMs) to enhance both training and inference performance. Experimental results demonstrate that the proposed hybrid solutions reduce the normalized mean square error (NMSE) by 0.19, effectively reducing wireless hallucinations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.06149v1-abstract-full').style.display = 'none'; document.getElementById('2503.06149v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15414">arXiv:2501.15414</a> <span> [<a href="https://arxiv.org/pdf/2501.15414">pdf</a>, <a href="https://arxiv.org/format/2501.15414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Semantic Communication with Entropy-and-Channel-Adaptive Rate Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weixuan Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuhao Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Q">Qianqian Yang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chongwen Huang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qian Wang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhaoyang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15414v1-abstract-short" style="display: inline;"> Traditional wireless image transmission methods struggle to balance rate efficiency and reconstruction quality under varying channel conditions. To address these challenges, we propose a novel semantic communication (SemCom) system that integrates entropy-aware and channel-adaptive mechanisms for wireless image transmission over multi-user multiple-input multiple-output (MU-MIMO) fading channels.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15414v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15414v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15414v1-abstract-full" style="display: none;"> Traditional wireless image transmission methods struggle to balance rate efficiency and reconstruction quality under varying channel conditions. To address these challenges, we propose a novel semantic communication (SemCom) system that integrates entropy-aware and channel-adaptive mechanisms for wireless image transmission over multi-user multiple-input multiple-output (MU-MIMO) fading channels. Unlike existing approaches, our system dynamically adjusts transmission rates based on the entropy of feature maps, channel state information (CSI), and signal-to-noise ratio (SNR), ensuring optimal resource utilization and robust performance. The system employs feature map pruning, channel attention, spatial attention, and multihead self-attention (MHSA) mechanisms to prioritize critical semantic features and effectively reconstruct images. Experimental results demonstrate that the proposed system outperforms state-of-the-art benchmarks, including BPG+LDPC+4QAM and Deep JSCC, in terms of rate-distortion performance, flexibility, and robustness, particularly under challenging conditions such as low SNR, imperfect CSI, and inter-user interference. This work establishes a strong foundation for adaptive-rate SemCom systems and highlights their potential for real-time, bandwidthintensive applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15414v1-abstract-full').style.display = 'none'; document.getElementById('2501.15414v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10166">arXiv:2411.10166</a> <span> [<a href="https://arxiv.org/pdf/2411.10166">pdf</a>, <a href="https://arxiv.org/format/2411.10166">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Two-Stage Robust Optimal Operation of Distribution Networks using Confidence Level Based Distributionally Information Gap Decision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhisheng Xiong</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+B">Bo Zeng</a>, <a href="/search/eess?searchtype=author&query=Palensky%2C+P">Peter Palensky</a>, <a href="/search/eess?searchtype=author&query=Vergara%2C+P+P">Pedro P. Vergara</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10166v1-abstract-short" style="display: inline;"> This paper presents a confidence level-based distributionally information gap decision theory (CL-DIGDT) framework for the two-stage robust optimal operation of distribution networks, aiming at deriving an optimal operational scheme capable of addressing uncertainties related to renewable energy and load demands. Building on conventional IGDT, the proposed framework utilizes the confidence level t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10166v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10166v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10166v1-abstract-full" style="display: none;"> This paper presents a confidence level-based distributionally information gap decision theory (CL-DIGDT) framework for the two-stage robust optimal operation of distribution networks, aiming at deriving an optimal operational scheme capable of addressing uncertainties related to renewable energy and load demands. Building on conventional IGDT, the proposed framework utilizes the confidence level to capture the asymmetric characteristics of uncertainties and maximize the risk-averse capability of the solution in a probabilistic manner. To account for the probabilistic consideration, the imprecise Dirichlet model is employed to construct the ambiguity sets of uncertainties, reducing reliance on precise probability distributions. Consequently, a two-stage robust optimal operation model for distribution networks using CL-DIGDT is developed. An iterative method is proposed to solve the model and determine the upper and lower bounds of the objective function. Case study demonstrates that the proposed approach yields a more robust and statistically optimized solution with required accuracy compared to existing method, contributing to a reduction in first-stage cost by 0.84%, second-stage average cost by 6.7%, and significantly increasing the reliability of the solution by 8%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10166v1-abstract-full').style.display = 'none'; document.getElementById('2411.10166v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10155">arXiv:2410.10155</a> <span> [<a href="https://arxiv.org/pdf/2410.10155">pdf</a>, <a href="https://arxiv.org/format/2410.10155">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Tracing Human Stress from Physiological Signals using UWB Radar </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+J">Jia Xu</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+T">Teng Xiao</a>, <a href="/search/eess?searchtype=author&query=Lv%2C+P">Pin Lv</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhe Chen</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+C">Chao Cai</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yang Zhang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10155v1-abstract-short" style="display: inline;"> Stress tracing is an important research domain that supports many applications, such as health care and stress management; and its closest related works are derived from stress detection. However, these existing works cannot well address two important challenges facing stress detection. First, most of these studies involve asking users to wear physiological sensors to detect their stress states, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10155v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10155v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10155v1-abstract-full" style="display: none;"> Stress tracing is an important research domain that supports many applications, such as health care and stress management; and its closest related works are derived from stress detection. However, these existing works cannot well address two important challenges facing stress detection. First, most of these studies involve asking users to wear physiological sensors to detect their stress states, which has a negative impact on the user experience. Second, these studies have failed to effectively utilize multimodal physiological signals, which results in less satisfactory detection results. This paper formally defines the stress tracing problem, which emphasizes the continuous detection of human stress states. A novel deep stress tracing method, named DST, is presented. Note that DST proposes tracing human stress based on physiological signals collected by a noncontact ultrawideband radar, which is more friendly to users when collecting their physiological signals. In DST, a signal extraction module is carefully designed at first to robustly extract multimodal physiological signals from the raw RF data of the radar, even in the presence of body movement. Afterward, a multimodal fusion module is proposed in DST to ensure that the extracted multimodal physiological signals can be effectively fused and utilized. Extensive experiments are conducted on three real-world datasets, including one self-collected dataset and two publicity datasets. Experimental results show that the proposed DST method significantly outperforms all the baselines in terms of tracing human stress states. On average, DST averagely provides a 6.31% increase in detection accuracy on all datasets, compared with the best baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10155v1-abstract-full').style.display = 'none'; document.getElementById('2410.10155v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14113">arXiv:2409.14113</a> <span> [<a href="https://arxiv.org/pdf/2409.14113">pdf</a>, <a href="https://arxiv.org/format/2409.14113">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Accelerated Multi-Contrast MRI Reconstruction via Frequency and Spatial Mutual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qi Chen</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaohan Xing</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhen Chen</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14113v1-abstract-short" style="display: inline;"> To accelerate Magnetic Resonance (MR) imaging procedures, Multi-Contrast MR Reconstruction (MCMR) has become a prevalent trend that utilizes an easily obtainable modality as an auxiliary to support high-quality reconstruction of the target modality with under-sampled k-space measurements. The exploration of global dependency and complementary information across different modalities is essential fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14113v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14113v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14113v1-abstract-full" style="display: none;"> To accelerate Magnetic Resonance (MR) imaging procedures, Multi-Contrast MR Reconstruction (MCMR) has become a prevalent trend that utilizes an easily obtainable modality as an auxiliary to support high-quality reconstruction of the target modality with under-sampled k-space measurements. The exploration of global dependency and complementary information across different modalities is essential for MCMR. However, existing methods either struggle to capture global dependency due to the limited receptive field or suffer from quadratic computational complexity. To tackle this dilemma, we propose a novel Frequency and Spatial Mutual Learning Network (FSMNet), which efficiently explores global dependencies across different modalities. Specifically, the features for each modality are extracted by the Frequency-Spatial Feature Extraction (FSFE) module, featuring a frequency branch and a spatial branch. Benefiting from the global property of the Fourier transform, the frequency branch can efficiently capture global dependency with an image-size receptive field, while the spatial branch can extract local features. To exploit complementary information from the auxiliary modality, we propose a Cross-Modal Selective fusion (CMS-fusion) module that selectively incorporate the frequency and spatial features from the auxiliary modality to enhance the corresponding branch of the target modality. To further integrate the enhanced global features from the frequency branch and the enhanced local features from the spatial branch, we develop a Frequency-Spatial fusion (FS-fusion) module, resulting in a comprehensive feature representation for the target modality. Extensive experiments on the BraTS and fastMRI datasets demonstrate that the proposed FSMNet achieves state-of-the-art performance for the MCMR task with different acceleration factors. The code is available at: https://github.com/qic999/FSMNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14113v1-abstract-full').style.display = 'none'; document.getElementById('2409.14113v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as a poster by Medical Image Computing and Computer Assisted Intervention (MICCAI) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03977">arXiv:2409.03977</a> <span>  </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bi-modality Images Transfer with a Discrete Process Matching Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhe Xiong</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Q">Qiaoqiao Ding</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaoqun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03977v2-abstract-short" style="display: inline;"> Recently, medical image synthesis gains more and more popularity, along with the rapid development of generative models. Medical image synthesis aims to generate an unacquired image modality, often from other observed data modalities. Synthesized images can be used for clinical diagnostic assistance, data augmentation for model training and validation or image quality improving. In the meanwhile,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03977v2-abstract-full').style.display = 'inline'; document.getElementById('2409.03977v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03977v2-abstract-full" style="display: none;"> Recently, medical image synthesis gains more and more popularity, along with the rapid development of generative models. Medical image synthesis aims to generate an unacquired image modality, often from other observed data modalities. Synthesized images can be used for clinical diagnostic assistance, data augmentation for model training and validation or image quality improving. In the meanwhile, the flow-based models are among the successful generative models for the ability of generating realistic and high-quality synthetic images. However, most flow-based models require to calculate flow ordinary different equation (ODE) evolution steps in transfer process, for which the performances are significantly limited by heavy computation time due to a large number of time iterations. In this paper, we propose a novel flow-based model, namely Discrete Process Matching (DPM) to accomplish the bi-modality image transfer tasks. Different to other flow matching based models, we propose to utilize both forward and backward ODE flow and enhance the consistency on the intermediate images of few discrete time steps, resulting in a transfer process with much less iteration steps while maintaining high-quality generations for both modalities. Our experiments on three datasets of MRI T1/T2 and CT/MRI demonstrate that DPM outperforms other state-of-the-art flow-based methods for bi-modality image synthesis, achieving higher image quality with less computation time cost. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03977v2-abstract-full').style.display = 'none'; document.getElementById('2409.03977v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">There are illegal data and models which is not allow for publish now. I need to withdraw the paper and revise it to other public examples</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07050">arXiv:2408.07050</a> <span> [<a href="https://arxiv.org/pdf/2408.07050">pdf</a>, <a href="https://arxiv.org/format/2408.07050">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> PSM: Learning Probabilistic Embeddings for Multi-scale Zero-Shot Soundscape Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Khanal%2C+S">Subash Khanal</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+E">Eric Xing</a>, <a href="/search/eess?searchtype=author&query=Sastry%2C+S">Srikumar Sastry</a>, <a href="/search/eess?searchtype=author&query=Dhakal%2C+A">Aayush Dhakal</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhexiao Xiong</a>, <a href="/search/eess?searchtype=author&query=Ahmad%2C+A">Adeel Ahmad</a>, <a href="/search/eess?searchtype=author&query=Jacobs%2C+N">Nathan Jacobs</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07050v1-abstract-short" style="display: inline;"> A soundscape is defined by the acoustic environment a person perceives at a location. In this work, we propose a framework for mapping soundscapes across the Earth. Since soundscapes involve sound distributions that span varying spatial scales, we represent locations with multi-scale satellite imagery and learn a joint representation among this imagery, audio, and text. To capture the inherent unc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07050v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07050v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07050v1-abstract-full" style="display: none;"> A soundscape is defined by the acoustic environment a person perceives at a location. In this work, we propose a framework for mapping soundscapes across the Earth. Since soundscapes involve sound distributions that span varying spatial scales, we represent locations with multi-scale satellite imagery and learn a joint representation among this imagery, audio, and text. To capture the inherent uncertainty in the soundscape of a location, we design the representation space to be probabilistic. We also fuse ubiquitous metadata (including geolocation, time, and data source) to enable learning of spatially and temporally dynamic representations of soundscapes. We demonstrate the utility of our framework by creating large-scale soundscape maps integrating both audio and text with temporal control. To facilitate future research on this task, we also introduce a large-scale dataset, GeoSound, containing over $300k$ geotagged audio samples paired with both low- and high-resolution satellite imagery. We demonstrate that our method outperforms the existing state-of-the-art on both GeoSound and the existing SoundingEarth dataset. Our dataset and code is available at https://github.com/mvrl/PSM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07050v1-abstract-full').style.display = 'none'; document.getElementById('2408.07050v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ACM MM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04192">arXiv:2408.04192</a> <span> [<a href="https://arxiv.org/pdf/2408.04192">pdf</a>, <a href="https://arxiv.org/ps/2408.04192">ps</a>, <a href="https://arxiv.org/format/2408.04192">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Pilot-Aided Joint Time Synchronization and Channel Estimation for OTFS </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jiazheng Sun</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+P">Peng Yang</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+X">Xianbin Cao</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Haijun Zhang</a>, <a href="/search/eess?searchtype=author&query=Quek%2C+T+Q+S">Tony Q. S. Quek</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04192v2-abstract-short" style="display: inline;"> This letter proposes a pilot-aided joint time synchronization and channel estimation (JTSCE) algorithm for orthogonal time frequency space (OTFS) systems. Unlike existing algorithms, JTSCE employs a maximum length sequence (MLS) rather than an isolated signal as the pilot. Distinctively, JTSCE explores MLS's autocorrelation properties to estimate timing offset and channel delay taps. After obtaini… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04192v2-abstract-full').style.display = 'inline'; document.getElementById('2408.04192v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04192v2-abstract-full" style="display: none;"> This letter proposes a pilot-aided joint time synchronization and channel estimation (JTSCE) algorithm for orthogonal time frequency space (OTFS) systems. Unlike existing algorithms, JTSCE employs a maximum length sequence (MLS) rather than an isolated signal as the pilot. Distinctively, JTSCE explores MLS's autocorrelation properties to estimate timing offset and channel delay taps. After obtaining delay taps, closed-form expressions of Doppler and channel gain for each propagation path are derived. Simulation results indicate that, compared to its counterpart, JTSCE achieves better bit error rate performance, close to that with perfect time synchronization and channel state information. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04192v2-abstract-full').style.display = 'none'; document.getElementById('2408.04192v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15395">arXiv:2407.15395</a> <span> [<a href="https://arxiv.org/pdf/2407.15395">pdf</a>, <a href="https://arxiv.org/format/2407.15395">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> FAST-GSC: Fast and Adaptive Semantic Transmission for Generative Semantic Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yiru Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+W">Wanting Yang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yuping Zhao</a>, <a href="/search/eess?searchtype=author&query=Mao%2C+S">Shiwen Mao</a>, <a href="/search/eess?searchtype=author&query=Quek%2C+T+Q+S">Tony Q. S. Quek</a>, <a href="/search/eess?searchtype=author&query=Poor%2C+H+V">H. Vincent Poor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15395v1-abstract-short" style="display: inline;"> The rapidly evolving field of generative artificial intelligence technology has introduced innovative approaches for developing semantic communication (SemCom) frameworks, leading to the emergence of a new paradigm-generative SemCom (GSC). However, the complex processes involved in semantic extraction and generative inference may result in considerable latency in resource-constrained scenarios. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15395v1-abstract-full').style.display = 'inline'; document.getElementById('2407.15395v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15395v1-abstract-full" style="display: none;"> The rapidly evolving field of generative artificial intelligence technology has introduced innovative approaches for developing semantic communication (SemCom) frameworks, leading to the emergence of a new paradigm-generative SemCom (GSC). However, the complex processes involved in semantic extraction and generative inference may result in considerable latency in resource-constrained scenarios. To tackle these issues, we introduce a new GSC framework that involves fast and adaptive semantic transmission (FAST-GSC). This framework incorporates one innovative communication mechanism and two enhancement strategies at the transmitter and receiver, respectively. Aiming to reduce task latency, our communication mechanism enables fast semantic transmission by parallelizing the processes of semantic extraction at the transmitter and inference at the receiver. Preliminary evaluations indicate that while this mechanism effectively reduces task latency, it could potentially compromise task performance. To address this issue, we propose two additional methods for enhancement. First, at the transmitter, we employ reinforcement learning to discern the intrinsic temporal dependencies among the semantic units and design their extraction and transmission sequence accordingly. Second, at the receiver, we design a semantic difference calculation module and propose a sequential conditional denoising approach to alleviate the stringent immediacy requirement for the reception of semantic features. Extensive experiments demonstrate that our proposed architecture achieves a performance score comparable to the conventional GSC architecture while realizing a 52% reduction in residual task latency that extends beyond the fixed inference duration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15395v1-abstract-full').style.display = 'none'; document.getElementById('2407.15395v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09935">arXiv:2407.09935</a> <span> [<a href="https://arxiv.org/pdf/2407.09935">pdf</a>, <a href="https://arxiv.org/format/2407.09935">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> LeRF: Learning Resampling Function for Adaptive and Efficient Image Interpolation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+J">Jiacheng Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Chang Chen</a>, <a href="/search/eess?searchtype=author&query=Song%2C+F">Fenglong Song</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Y">Youliang Yan</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09935v1-abstract-short" style="display: inline;"> Image resampling is a basic technique that is widely employed in daily applications, such as camera photo editing. Recent deep neural networks (DNNs) have made impressive progress in performance by introducing learned data priors. Still, these methods are not the perfect substitute for interpolation, due to the drawbacks in efficiency and versatility. In this work, we propose a novel method of Lea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09935v1-abstract-full').style.display = 'inline'; document.getElementById('2407.09935v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09935v1-abstract-full" style="display: none;"> Image resampling is a basic technique that is widely employed in daily applications, such as camera photo editing. Recent deep neural networks (DNNs) have made impressive progress in performance by introducing learned data priors. Still, these methods are not the perfect substitute for interpolation, due to the drawbacks in efficiency and versatility. In this work, we propose a novel method of Learning Resampling Function (termed LeRF), which takes advantage of both the structural priors learned by DNNs and the locally continuous assumption of interpolation. Specifically, LeRF assigns spatially varying resampling functions to input image pixels and learns to predict the hyper-parameters that determine the shapes of these resampling functions with a neural network. Based on the formulation of LeRF, we develop a family of models, including both efficiency-orientated and performance-orientated ones. To achieve interpolation-level efficiency, we adopt look-up tables (LUTs) to accelerate the inference of the learned neural network. Furthermore, we design a directional ensemble strategy and edge-sensitive indexing patterns to better capture local structures. On the other hand, to obtain DNN-level performance, we propose an extension of LeRF to enable it in cooperation with pre-trained upsampling models for cascaded resampling. Extensive experiments show that the efficiency-orientated version of LeRF runs as fast as interpolation, generalizes well to arbitrary transformations, and outperforms interpolation significantly, e.g., up to 3dB PSNR gain over Bicubic for x2 upsampling on Manga109. Besides, the performance-orientated version of LeRF reaches comparable performance with existing DNNs at much higher efficiency, e.g., less than 25% running time on a desktop GPU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09935v1-abstract-full').style.display = 'none'; document.getElementById('2407.09935v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code: https://github.com/ddlee-cn/LeRF-PyTorch</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16083">arXiv:2406.16083</a> <span> [<a href="https://arxiv.org/pdf/2406.16083">pdf</a>, <a href="https://arxiv.org/format/2406.16083">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Mamba-based Light Field Super-Resolution with Efficient Subspace Scanning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gao%2C+R">Ruisheng Gao</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zeyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16083v1-abstract-short" style="display: inline;"> Transformer-based methods have demonstrated impressive performance in 4D light field (LF) super-resolution by effectively modeling long-range spatial-angular correlations, but their quadratic complexity hinders the efficient processing of high resolution 4D inputs, resulting in slow inference speed and high memory cost. As a compromise, most prior work adopts a patch-based strategy, which fails to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16083v1-abstract-full').style.display = 'inline'; document.getElementById('2406.16083v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16083v1-abstract-full" style="display: none;"> Transformer-based methods have demonstrated impressive performance in 4D light field (LF) super-resolution by effectively modeling long-range spatial-angular correlations, but their quadratic complexity hinders the efficient processing of high resolution 4D inputs, resulting in slow inference speed and high memory cost. As a compromise, most prior work adopts a patch-based strategy, which fails to leverage the full information from the entire input LFs. The recently proposed selective state-space model, Mamba, has gained popularity for its efficient long-range sequence modeling. In this paper, we propose a Mamba-based Light Field Super-Resolution method, named MLFSR, by designing an efficient subspace scanning strategy. Specifically, we tokenize 4D LFs into subspace sequences and conduct bi-directional scanning on each subspace. Based on our scanning strategy, we then design the Mamba-based Global Interaction (MGI) module to capture global information and the local Spatial- Angular Modulator (SAM) to complement local details. Additionally, we introduce a Transformer-to-Mamba (T2M) loss to further enhance overall performance. Extensive experiments on public benchmarks demonstrate that MLFSR surpasses CNN-based models and rivals Transformer-based methods in performance while maintaining higher efficiency. With quicker inference speed and reduced memory demand, MLFSR facilitates full-image processing of high-resolution 4D LFs with enhanced performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16083v1-abstract-full').style.display = 'none'; document.getElementById('2406.16083v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages,7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12300">arXiv:2406.12300</a> <span> [<a href="https://arxiv.org/pdf/2406.12300">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> IR2QSM: Quantitative Susceptibility Mapping via Deep Neural Networks with Iterative Reverse Concatenations and Recurrent Modules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+M">Min Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhuang Xiong</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Ying Liu</a>, <a href="/search/eess?searchtype=author&query=Rong%2C+P">Pengfei Rong</a>, <a href="/search/eess?searchtype=author&query=Shan%2C+S">Shanshan Shan</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+H">Hongfu Sun</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yang Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12300v1-abstract-short" style="display: inline;"> Quantitative susceptibility mapping (QSM) is an MRI phase-based post-processing technique to extract the distribution of tissue susceptibilities, demonstrating significant potential in studying neurological diseases. However, the ill-conditioned nature of dipole inversion makes QSM reconstruction from the tissue field prone to noise and artifacts. In this work, we propose a novel deep learning-bas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12300v1-abstract-full').style.display = 'inline'; document.getElementById('2406.12300v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12300v1-abstract-full" style="display: none;"> Quantitative susceptibility mapping (QSM) is an MRI phase-based post-processing technique to extract the distribution of tissue susceptibilities, demonstrating significant potential in studying neurological diseases. However, the ill-conditioned nature of dipole inversion makes QSM reconstruction from the tissue field prone to noise and artifacts. In this work, we propose a novel deep learning-based IR2QSM method for QSM reconstruction. It is designed by iterating four times of a reverse concatenations and middle recurrent modules enhanced U-net, which could dramatically improve the efficiency of latent feature utilization. Simulated and in vivo experiments were conducted to compare IR2QSM with several traditional algorithms (MEDI and iLSQR) and state-of-the-art deep learning methods (U-net, xQSM, and LPCNN). The results indicated that IR2QSM was able to obtain QSM images with significantly increased accuracy and mitigated artifacts over other methods. Particularly, IR2QSM demonstrated on average the best NRMSE (27.59%) in simulated experiments, which is 15.48%, 7.86%, 17.24%, 9.26%, and 29.13% lower than iLSQR, MEDI, U-net, xQSM, LPCNN, respectively, and led to improved QSM results with fewer artifacts for the in vivo data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12300v1-abstract-full').style.display = 'none'; document.getElementById('2406.12300v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04111">arXiv:2406.04111</a> <span> [<a href="https://arxiv.org/pdf/2406.04111">pdf</a>, <a href="https://arxiv.org/format/2406.04111">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> UrbanSARFloods: Sentinel-1 SLC-Based Benchmark Dataset for Urban and Open-Area Flood Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhao%2C+J">Jie Zhao</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhitong Xiong</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+X+X">Xiao Xiang Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04111v1-abstract-short" style="display: inline;"> Due to its cloud-penetrating capability and independence from solar illumination, satellite Synthetic Aperture Radar (SAR) is the preferred data source for large-scale flood mapping, providing global coverage and including various land cover classes. However, most studies on large-scale SAR-derived flood mapping using deep learning algorithms have primarily focused on flooded open areas, utilizing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04111v1-abstract-full').style.display = 'inline'; document.getElementById('2406.04111v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04111v1-abstract-full" style="display: none;"> Due to its cloud-penetrating capability and independence from solar illumination, satellite Synthetic Aperture Radar (SAR) is the preferred data source for large-scale flood mapping, providing global coverage and including various land cover classes. However, most studies on large-scale SAR-derived flood mapping using deep learning algorithms have primarily focused on flooded open areas, utilizing available open-access datasets (e.g., Sen1Floods11) and with limited attention to urban floods. To address this gap, we introduce \textbf{UrbanSARFloods}, a floodwater dataset featuring pre-processed Sentinel-1 intensity data and interferometric coherence imagery acquired before and during flood events. It contains 8,879 $512\times 512$ chips covering 807,500 $km^2$ across 20 land cover classes and 5 continents, spanning 18 flood events. We used UrbanSARFloods to benchmark existing state-of-the-art convolutional neural networks (CNNs) for segmenting open and urban flood areas. Our findings indicate that prevalent approaches, including the Weighted Cross-Entropy (WCE) loss and the application of transfer learning with pretrained models, fall short in overcoming the obstacles posed by imbalanced data and the constraints of a small training dataset. Urban flood detection remains challenging. Future research should explore strategies for addressing imbalanced data challenges and investigate transfer learning's potential for SAR-based large-scale flood mapping. Besides, expanding this dataset to include additional flood events holds promise for enhancing its utility and contributing to advancements in flood mapping techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04111v1-abstract-full').style.display = 'none'; document.getElementById('2406.04111v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR 2024 EarthVision Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16850">arXiv:2405.16850</a> <span> [<a href="https://arxiv.org/pdf/2405.16850">pdf</a>, <a href="https://arxiv.org/format/2405.16850">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> UniCompress: Enhancing Multi-Data Medical Image Compression with Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+R">Runzhao Yang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yinda Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhihong Zhang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xiaoyu Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zongren Li</a>, <a href="/search/eess?searchtype=author&query=He%2C+K">Kunlun He</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a>, <a href="/search/eess?searchtype=author&query=Suo%2C+J">Jinli Suo</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+Q">Qionghai Dai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16850v1-abstract-short" style="display: inline;"> In the field of medical image compression, Implicit Neural Representation (INR) networks have shown remarkable versatility due to their flexible compression ratios, yet they are constrained by a one-to-one fitting approach that results in lengthy encoding times. Our novel method, ``\textbf{UniCompress}'', innovatively extends the compression capabilities of INR by being the first to compress multi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16850v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16850v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16850v1-abstract-full" style="display: none;"> In the field of medical image compression, Implicit Neural Representation (INR) networks have shown remarkable versatility due to their flexible compression ratios, yet they are constrained by a one-to-one fitting approach that results in lengthy encoding times. Our novel method, ``\textbf{UniCompress}'', innovatively extends the compression capabilities of INR by being the first to compress multiple medical data blocks using a single INR network. By employing wavelet transforms and quantization, we introduce a codebook containing frequency domain information as a prior input to the INR network. This enhances the representational power of INR and provides distinctive conditioning for different image blocks. Furthermore, our research introduces a new technique for the knowledge distillation of implicit representations, simplifying complex model knowledge into more manageable formats to improve compression ratios. Extensive testing on CT and electron microscopy (EM) datasets has demonstrated that UniCompress outperforms traditional INR methods and commercial compression solutions like HEVC, especially in complex and high compression scenarios. Notably, compared to existing INR techniques, UniCompress achieves a 4$\sim$5 times increase in compression speed, marking a significant advancement in the field of medical image compression. Codes will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16850v1-abstract-full').style.display = 'none'; document.getElementById('2405.16850v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04285">arXiv:2405.04285</a> <span> [<a href="https://arxiv.org/pdf/2405.04285">pdf</a>, <a href="https://arxiv.org/format/2405.04285">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> On the Foundations of Earth and Climate Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+X+X">Xiao Xiang Zhu</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhitong Xiong</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yi Wang</a>, <a href="/search/eess?searchtype=author&query=Stewart%2C+A+J">Adam J. Stewart</a>, <a href="/search/eess?searchtype=author&query=Heidler%2C+K">Konrad Heidler</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuanyuan Wang</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+Z">Zhenghang Yuan</a>, <a href="/search/eess?searchtype=author&query=Dujardin%2C+T">Thomas Dujardin</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Q">Qingsong Xu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yilei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04285v1-abstract-short" style="display: inline;"> Foundation models have enormous potential in advancing Earth and climate sciences, however, current approaches may not be optimal as they focus on a few basic features of a desirable Earth and climate foundation model. Crafting the ideal Earth foundation model, we define eleven features which would allow such a foundation model to be beneficial for any geoscientific downstream application in an en… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04285v1-abstract-full').style.display = 'inline'; document.getElementById('2405.04285v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04285v1-abstract-full" style="display: none;"> Foundation models have enormous potential in advancing Earth and climate sciences, however, current approaches may not be optimal as they focus on a few basic features of a desirable Earth and climate foundation model. Crafting the ideal Earth foundation model, we define eleven features which would allow such a foundation model to be beneficial for any geoscientific downstream application in an environmental- and human-centric manner.We further shed light on the way forward to achieve the ideal model and to evaluate Earth foundation models. What comes after foundation models? Energy efficient adaptation, adversarial defenses, and interpretability are among the emerging directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04285v1-abstract-full').style.display = 'none'; document.getElementById('2405.04285v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04012">arXiv:2405.04012</a> <span> [<a href="https://arxiv.org/pdf/2405.04012">pdf</a>, <a href="https://arxiv.org/format/2405.04012">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Latency and Energy Minimization in NOMA-Assisted MEC Network: A Federated Deep Reinforcement Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ahmadi%2C+A">Arian Ahmadi</a>, <a href="/search/eess?searchtype=author&query=H%C3%B8st-Madsen%2C+A">Anders H酶st-Madsen</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zixiang Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04012v1-abstract-short" style="display: inline;"> Multi-access edge computing (MEC) is seen as a vital component of forthcoming 6G wireless networks, aiming to support emerging applications that demand high service reliability and low latency. However, ensuring the ultra-reliable and low-latency performance of MEC networks poses a significant challenge due to uncertainties associated with wireless links, constraints imposed by communication and c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04012v1-abstract-full').style.display = 'inline'; document.getElementById('2405.04012v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04012v1-abstract-full" style="display: none;"> Multi-access edge computing (MEC) is seen as a vital component of forthcoming 6G wireless networks, aiming to support emerging applications that demand high service reliability and low latency. However, ensuring the ultra-reliable and low-latency performance of MEC networks poses a significant challenge due to uncertainties associated with wireless links, constraints imposed by communication and computing resources, and the dynamic nature of network traffic. Enabling ultra-reliable and low-latency MEC mandates efficient load balancing jointly with resource allocation. In this paper, we investigate the joint optimization problem of offloading decisions, computation and communication resource allocation to minimize the expected weighted sum of delivery latency and energy consumption in a non-orthogonal multiple access (NOMA)-assisted MEC network. Given the formulated problem is a mixed-integer non-linear programming (MINLP), a new multi-agent federated deep reinforcement learning (FDRL) solution based on double deep Q-network (DDQN) is developed to efficiently optimize the offloading strategies across the MEC network while accelerating the learning process of the Internet-of-Thing (IoT) devices. Simulation results show that the proposed FDRL scheme can effectively reduce the weighted sum of delivery latency and energy consumption of IoT devices in the MEC network and outperform the baseline approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04012v1-abstract-full').style.display = 'none'; document.getElementById('2405.04012v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.19415">arXiv:2404.19415</a> <span> [<a href="https://arxiv.org/pdf/2404.19415">pdf</a>, <a href="https://arxiv.org/format/2404.19415">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Two-Stage Robust Planning Model for Park-Level Integrated Energy System Considering Uncertain Equipment Contingency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zuxun Xiong</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+X">Xinwei Shen</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+H">Hongbin Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.19415v2-abstract-short" style="display: inline;"> To enhance the reliability of Integrated Energy Systems (IESs) and address the research gap in reliability-based planning methods, this paper proposes a two-stage robust planning model specifically for park-level IESs. The proposed planning model considers uncertainties like load demand fluctuations and equipment contingencies, and provides a reliable scheme of equipment selection and sizing for I… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19415v2-abstract-full').style.display = 'inline'; document.getElementById('2404.19415v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.19415v2-abstract-full" style="display: none;"> To enhance the reliability of Integrated Energy Systems (IESs) and address the research gap in reliability-based planning methods, this paper proposes a two-stage robust planning model specifically for park-level IESs. The proposed planning model considers uncertainties like load demand fluctuations and equipment contingencies, and provides a reliable scheme of equipment selection and sizing for IES investors. Inspired by the unit commitment problem, we formulate an equipment contingency uncertainty set to accurately describe the potential equipment contingencies which happen and can be repaired within a day. Then, a modified nested column-and-constraint generation algorithm is applied to solve this two-stage robust planning model with integer recourse efficiently. In the case study, the role of energy storage system for IES reliability enhancement is analyzed in detail. Computational results demonstrate the advantage of the proposed model over other planning models in terms of improving reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19415v2-abstract-full').style.display = 'none'; document.getElementById('2404.19415v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14140">arXiv:2404.14140</a> <span> [<a href="https://arxiv.org/pdf/2404.14140">pdf</a>, <a href="https://arxiv.org/format/2404.14140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Generative Artificial Intelligence Assisted Wireless Sensing: Human Flow Detection in Practical Communication Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Ai%2C+B">Bo Ai</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Z">Zhu Han</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14140v1-abstract-short" style="display: inline;"> Groundbreaking applications such as ChatGPT have heightened research interest in generative artificial intelligence (GAI). Essentially, GAI excels not only in content generation but also in signal processing, offering support for wireless sensing. Hence, we introduce a novel GAI-assisted human flow detection system (G-HFD). Rigorously, G-HFD first uses channel state information (CSI) to estimate t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14140v1-abstract-full').style.display = 'inline'; document.getElementById('2404.14140v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14140v1-abstract-full" style="display: none;"> Groundbreaking applications such as ChatGPT have heightened research interest in generative artificial intelligence (GAI). Essentially, GAI excels not only in content generation but also in signal processing, offering support for wireless sensing. Hence, we introduce a novel GAI-assisted human flow detection system (G-HFD). Rigorously, G-HFD first uses channel state information (CSI) to estimate the velocity and acceleration of propagation path length change of the human-induced reflection (HIR). Then, given the strong inference ability of the diffusion model, we propose a unified weighted conditional diffusion model (UW-CDM) to denoise the estimation results, enabling the detection of the number of targets. Next, we use the CSI obtained by a uniform linear array with wavelength spacing to estimate the HIR's time of flight and direction of arrival (DoA). In this process, UW-CDM solves the problem of ambiguous DoA spectrum, ensuring accurate DoA estimation. Finally, through clustering, G-HFD determines the number of subflows and the number of targets in each subflow, i.e., the subflow size. The evaluation based on practical downlink communication signals shows G-HFD's accuracy of subflow size detection can reach 91%. This validates its effectiveness and underscores the significant potential of GAI in the context of wireless sensing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14140v1-abstract-full').style.display = 'none'; document.getElementById('2404.14140v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06765">arXiv:2404.06765</a> <span> [<a href="https://arxiv.org/pdf/2404.06765">pdf</a>, <a href="https://arxiv.org/format/2404.06765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Harnessing the Power of AI-Generated Content for Semantic Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yiru Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+W">Wanting Yang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yuping Zhao</a>, <a href="/search/eess?searchtype=author&query=Quek%2C+T+Q+S">Tony Q. S. Quek</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Z">Zhu Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06765v1-abstract-short" style="display: inline;"> Semantic Communication (SemCom) is envisaged as the next-generation paradigm to address challenges stemming from the conflicts between the increasing volume of transmission data and the scarcity of spectrum resources. However, existing SemCom systems face drawbacks, such as low explainability, modality rigidity, and inadequate reconstruction functionality. Recognizing the transformative capabiliti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06765v1-abstract-full').style.display = 'inline'; document.getElementById('2404.06765v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06765v1-abstract-full" style="display: none;"> Semantic Communication (SemCom) is envisaged as the next-generation paradigm to address challenges stemming from the conflicts between the increasing volume of transmission data and the scarcity of spectrum resources. However, existing SemCom systems face drawbacks, such as low explainability, modality rigidity, and inadequate reconstruction functionality. Recognizing the transformative capabilities of AI-generated content (AIGC) technologies in content generation, this paper explores a pioneering approach by integrating them into SemCom to address the aforementioned challenges. We employ a three-layer model to illustrate the proposed AIGC-assisted SemCom (AIGC-SCM) architecture, emphasizing its clear deviation from existing SemCom. Grounded in this model, we investigate various AIGC technologies with the potential to augment SemCom's performance. In alignment with SemCom's goal of conveying semantic meanings, we also introduce the new evaluation methods for our AIGC-SCM system. Subsequently, we explore communication scenarios where our proposed AIGC-SCM can realize its potential. For practical implementation, we construct a detailed integration workflow and conduct a case study in a virtual reality image transmission scenario. The results demonstrate our ability to maintain a high degree of alignment between the reconstructed content and the original source information, while substantially minimizing the data volume required for transmission. These findings pave the way for further enhancements in communication efficiency and the improvement of Quality of Service. At last, we present future directions for AIGC-SCM studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06765v1-abstract-full').style.display = 'none'; document.getElementById('2404.06765v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.14070">arXiv:2403.14070</a> <span> [<a href="https://arxiv.org/pdf/2403.14070">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> QSMDiff: Unsupervised 3D Diffusion Models for Quantitative Susceptibility Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhuang Xiong</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+W">Wei Jiang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yang Gao</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+H">Hongfu Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.14070v1-abstract-short" style="display: inline;"> Quantitative Susceptibility Mapping (QSM) dipole inversion is an ill-posed inverse problem for quantifying magnetic susceptibility distributions from MRI tissue phases. While supervised deep learning methods have shown success in specific QSM tasks, their generalizability across different acquisition scenarios remains constrained. Recent developments in diffusion models have demonstrated potential… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14070v1-abstract-full').style.display = 'inline'; document.getElementById('2403.14070v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.14070v1-abstract-full" style="display: none;"> Quantitative Susceptibility Mapping (QSM) dipole inversion is an ill-posed inverse problem for quantifying magnetic susceptibility distributions from MRI tissue phases. While supervised deep learning methods have shown success in specific QSM tasks, their generalizability across different acquisition scenarios remains constrained. Recent developments in diffusion models have demonstrated potential for solving 2D medical imaging inverse problems. However, their application to 3D modalities, such as QSM, remains challenging due to high computational demands. In this work, we developed a 3D image patch-based diffusion model, namely QSMDiff, for robust QSM reconstruction across different scan parameters, alongside simultaneous super-resolution and image-denoising tasks. QSMDiff adopts unsupervised 3D image patch training and full-size measurement guidance during inference for controlled image generation. Evaluation on simulated and in-vivo human brains, using gradient-echo and echo-planar imaging sequences across different acquisition parameters, demonstrates superior performance. The method proposed in QSMDiff also holds promise for impacting other 3D medical imaging applications beyond QSM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14070v1-abstract-full').style.display = 'none'; document.getElementById('2403.14070v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05826">arXiv:2403.05826</a> <span> [<a href="https://arxiv.org/pdf/2403.05826">pdf</a>, <a href="https://arxiv.org/format/2403.05826">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Cached Model-as-a-Resource: Provisioning Large Language Model Agents for Edge Intelligence in Space-air-ground Integrated Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+M">Minrui Xu</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Hongliang Zhang</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Mao%2C+S">Shiwen Mao</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Z">Zhu Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05826v2-abstract-short" style="display: inline;"> Edge intelligence in space-air-ground integrated networks (SAGINs) can enable worldwide network coverage beyond geographical limitations for users to access ubiquitous and low-latency intelligence services. Facing global coverage and complex environments in SAGINs, edge intelligence can provision approximate large language models (LLMs) agents for users via edge servers at ground base stations (BS… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05826v2-abstract-full').style.display = 'inline'; document.getElementById('2403.05826v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05826v2-abstract-full" style="display: none;"> Edge intelligence in space-air-ground integrated networks (SAGINs) can enable worldwide network coverage beyond geographical limitations for users to access ubiquitous and low-latency intelligence services. Facing global coverage and complex environments in SAGINs, edge intelligence can provision approximate large language models (LLMs) agents for users via edge servers at ground base stations (BSs) or cloud data centers relayed by satellites. As LLMs with billions of parameters are pre-trained on vast datasets, LLM agents have few-shot learning capabilities, e.g., chain-of-thought (CoT) prompting for complex tasks, which raises a new trade-off between resource consumption and performance in SAGINs. In this paper, we propose a joint caching and inference framework for edge intelligence to provision sustainable and ubiquitous LLM agents in SAGINs. We introduce "cached model-as-a-resource" for offering LLMs with limited context windows and propose a novel optimization framework, i.e., joint model caching and inference, to utilize cached model resources for provisioning LLM agent services along with communication, computing, and storage resources. We design "age of thought" (AoT) considering the CoT prompting of LLMs, and propose a least AoT cached model replacement algorithm for optimizing the provisioning cost. We propose a deep Q-network-based modified second-bid (DQMSB) auction to incentivize network operators, which can enhance allocation efficiency by 23% while guaranteeing strategy-proofness and free from adverse selection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05826v2-abstract-full').style.display = 'none'; document.getElementById('2403.05826v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.19470">arXiv:2402.19470</a> <span> [<a href="https://arxiv.org/pdf/2402.19470">pdf</a>, <a href="https://arxiv.org/format/2402.19470">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Generalizable Tumor Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qi Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xiaoxi Chen</a>, <a href="/search/eess?searchtype=author&query=Song%2C+H">Haorui Song</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a>, <a href="/search/eess?searchtype=author&query=Yuille%2C+A">Alan Yuille</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+C">Chen Wei</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Z">Zongwei Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.19470v2-abstract-short" style="display: inline;"> Tumor synthesis enables the creation of artificial tumors in medical images, facilitating the training of AI models for tumor detection and segmentation. However, success in tumor synthesis hinges on creating visually realistic tumors that are generalizable across multiple organs and, furthermore, the resulting AI models being capable of detecting real tumors in images sourced from different domai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.19470v2-abstract-full').style.display = 'inline'; document.getElementById('2402.19470v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.19470v2-abstract-full" style="display: none;"> Tumor synthesis enables the creation of artificial tumors in medical images, facilitating the training of AI models for tumor detection and segmentation. However, success in tumor synthesis hinges on creating visually realistic tumors that are generalizable across multiple organs and, furthermore, the resulting AI models being capable of detecting real tumors in images sourced from different domains (e.g., hospitals). This paper made a progressive stride toward generalizable tumor synthesis by leveraging a critical observation: early-stage tumors (< 2cm) tend to have similar imaging characteristics in computed tomography (CT), whether they originate in the liver, pancreas, or kidneys. We have ascertained that generative AI models, e.g., Diffusion Models, can create realistic tumors generalized to a range of organs even when trained on a limited number of tumor examples from only one organ. Moreover, we have shown that AI models trained on these synthetic tumors can be generalized to detect and segment real tumors from CT volumes, encompassing a broad spectrum of patient demographics, imaging protocols, and healthcare facilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.19470v2-abstract-full').style.display = 'none'; document.getElementById('2402.19470v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The IEEE / CVF Computer Vision and Pattern Recognition Conference (CVPR 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09756">arXiv:2402.09756</a> <span> [<a href="https://arxiv.org/pdf/2402.09756">pdf</a>, <a href="https://arxiv.org/format/2402.09756">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Mixture of Experts for Network Optimization: A Large Language Model-enabled Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+G">Guangyuan Liu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yijing Lin</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09756v1-abstract-short" style="display: inline;"> Optimizing various wireless user tasks poses a significant challenge for networking systems because of the expanding range of user requirements. Despite advancements in Deep Reinforcement Learning (DRL), the need for customized optimization tasks for individual users complicates developing and applying numerous DRL models, leading to substantial computation resource and energy consumption and can… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09756v1-abstract-full').style.display = 'inline'; document.getElementById('2402.09756v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09756v1-abstract-full" style="display: none;"> Optimizing various wireless user tasks poses a significant challenge for networking systems because of the expanding range of user requirements. Despite advancements in Deep Reinforcement Learning (DRL), the need for customized optimization tasks for individual users complicates developing and applying numerous DRL models, leading to substantial computation resource and energy consumption and can lead to inconsistent outcomes. To address this issue, we propose a novel approach utilizing a Mixture of Experts (MoE) framework, augmented with Large Language Models (LLMs), to analyze user objectives and constraints effectively, select specialized DRL experts, and weigh each decision from the participating experts. Specifically, we develop a gate network to oversee the expert models, allowing a collective of experts to tackle a wide array of new tasks. Furthermore, we innovatively substitute the traditional gate network with an LLM, leveraging its advanced reasoning capabilities to manage expert model selection for joint decisions. Our proposed method reduces the need to train new DRL models for each unique optimization problem, decreasing energy consumption and AI model implementation costs. The LLM-enabled MoE approach is validated through a general maze navigation task and a specific network service provider utility maximization task, demonstrating its effectiveness and practical applicability in optimizing complex networking systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09756v1-abstract-full').style.display = 'none'; document.getElementById('2402.09756v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02411">arXiv:2402.02411</a> <span> [<a href="https://arxiv.org/pdf/2402.02411">pdf</a>, <a href="https://arxiv.org/format/2402.02411">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Physics-Inspired Degradation Models for Hyperspectral Image Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lian%2C+J">Jie Lian</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Lizhi Wang</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lin Zhu</a>, <a href="/search/eess?searchtype=author&query=Dian%2C+R">Renwei Dian</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+H">Hua Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02411v1-abstract-short" style="display: inline;"> The fusion of a low-spatial-resolution hyperspectral image (LR-HSI) with a high-spatial-resolution multispectral image (HR-MSI) has garnered increasing research interest. However, most fusion methods solely focus on the fusion algorithm itself and overlook the degradation models, which results in unsatisfactory performance in practical scenarios. To fill this gap, we propose physics-inspired degra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02411v1-abstract-full').style.display = 'inline'; document.getElementById('2402.02411v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02411v1-abstract-full" style="display: none;"> The fusion of a low-spatial-resolution hyperspectral image (LR-HSI) with a high-spatial-resolution multispectral image (HR-MSI) has garnered increasing research interest. However, most fusion methods solely focus on the fusion algorithm itself and overlook the degradation models, which results in unsatisfactory performance in practical scenarios. To fill this gap, we propose physics-inspired degradation models (PIDM) to model the degradation of LR-HSI and HR-MSI, which comprises a spatial degradation network (SpaDN) and a spectral degradation network (SpeDN). SpaDN and SpeDN are designed based on two insights. First, we employ spatial warping and spectral modulation operations to simulate lens aberrations, thereby introducing non-uniformity into the spatial and spectral degradation processes. Second, we utilize asymmetric downsampling and parallel downsampling operations to separately reduce the spatial and spectral resolutions of the images, thus ensuring the matching of spatial and spectral degradation processes with specific physical characteristics. Once SpaDN and SpeDN are established, we adopt a self-supervised training strategy to optimize the network parameters and provide a plug-and-play solution for fusion methods. Comprehensive experiments demonstrate that our proposed PIDM can boost the fusion performance of existing fusion methods in practical scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02411v1-abstract-full').style.display = 'none'; document.getElementById('2402.02411v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.07120">arXiv:2401.07120</a> <span> [<a href="https://arxiv.org/pdf/2401.07120">pdf</a>, <a href="https://arxiv.org/format/2401.07120">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Generative AI-enabled Quantum Computing Networks and Intelligent Resource Allocation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+M">Minrui Xu</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+Y">Yuan Cao</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yulan Gao</a>, <a href="/search/eess?searchtype=author&query=Ren%2C+C">Chao Ren</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+H">Han Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.07120v1-abstract-short" style="display: inline;"> Quantum computing networks enable scalable collaboration and secure information exchange among multiple classical and quantum computing nodes while executing large-scale generative AI computation tasks and advanced quantum algorithms. Quantum computing networks overcome limitations such as the number of qubits and coherence time of entangled pairs and offer advantages for generative AI infrastruct… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.07120v1-abstract-full').style.display = 'inline'; document.getElementById('2401.07120v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.07120v1-abstract-full" style="display: none;"> Quantum computing networks enable scalable collaboration and secure information exchange among multiple classical and quantum computing nodes while executing large-scale generative AI computation tasks and advanced quantum algorithms. Quantum computing networks overcome limitations such as the number of qubits and coherence time of entangled pairs and offer advantages for generative AI infrastructure, including enhanced noise reduction through distributed processing and improved scalability by connecting multiple quantum devices. However, efficient resource allocation in quantum computing networks is a critical challenge due to factors including qubit variability and network complexity. In this article, we propose an intelligent resource allocation framework for quantum computing networks to improve network scalability with minimized resource costs. To achieve scalability in quantum computing networks, we formulate the resource allocation problem as stochastic programming, accounting for the uncertain fidelities of qubits and entangled pairs. Furthermore, we introduce state-of-the-art reinforcement learning (RL) algorithms, from generative learning to quantum machine learning for optimal quantum resource allocation to resolve the proposed stochastic resource allocation problem efficiently. Finally, we optimize the resource allocation in heterogeneous quantum computing networks supporting quantum generative learning applications and propose a multi-agent RL-based algorithm to learn the optimal resource allocation policies without prior knowledge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.07120v1-abstract-full').style.display = 'none'; document.getElementById('2401.07120v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12078">arXiv:2311.12078</a> <span> [<a href="https://arxiv.org/pdf/2311.12078">pdf</a>, <a href="https://arxiv.org/format/2311.12078">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Fast Controllable Diffusion Models for Undersampled MRI Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+W">Wei Jiang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhuang Xiong</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+N">Nan Ye</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+H">Hongfu Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12078v3-abstract-short" style="display: inline;"> Supervised deep learning methods have shown promise in undersampled Magnetic Resonance Imaging (MRI) reconstruction, but their requirement for paired data limits their generalizability to the diverse MRI acquisition parameters. Recently, unsupervised controllable generative diffusion models have been applied to undersampled MRI reconstruction, without paired data or model retraining for different… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12078v3-abstract-full').style.display = 'inline'; document.getElementById('2311.12078v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12078v3-abstract-full" style="display: none;"> Supervised deep learning methods have shown promise in undersampled Magnetic Resonance Imaging (MRI) reconstruction, but their requirement for paired data limits their generalizability to the diverse MRI acquisition parameters. Recently, unsupervised controllable generative diffusion models have been applied to undersampled MRI reconstruction, without paired data or model retraining for different MRI acquisitions. However, diffusion models are generally slow in sampling and state-of-the-art acceleration techniques can lead to sub-optimal results when directly applied to the controllable generation process. This study introduces a new algorithm called Predictor-Projector-Noisor (PPN), which enhances and accelerates controllable generation of diffusion models for undersampled MRI reconstruction. Our results demonstrate that PPN produces high-fidelity MR images that conform to undersampled k-space measurements with significantly shorter reconstruction time than other controllable sampling methods. In addition, the unsupervised PPN accelerated diffusion models are adaptable to different MRI acquisition parameters, making them more practical for clinical use than supervised learning techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12078v3-abstract-full').style.display = 'none'; document.getElementById('2311.12078v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.07823">arXiv:2311.07823</a> <span> [<a href="https://arxiv.org/pdf/2311.07823">pdf</a>, <a href="https://arxiv.org/format/2311.07823">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.media.2024.103160">10.1016/j.media.2024.103160 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Plug-and-Play Latent Feature Editing for Orientation-Adaptive Quantitative Susceptibility Mapping Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yang Gao</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhuang Xiong</a>, <a href="/search/eess?searchtype=author&query=Shan%2C+S">Shanshan Shan</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yin Liu</a>, <a href="/search/eess?searchtype=author&query=Rong%2C+P">Pengfei Rong</a>, <a href="/search/eess?searchtype=author&query=Li%2C+M">Min Li</a>, <a href="/search/eess?searchtype=author&query=Wilman%2C+A+H">Alan H Wilman</a>, <a href="/search/eess?searchtype=author&query=Pike%2C+G+B">G. Bruce Pike</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+H">Hongfu Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.07823v2-abstract-short" style="display: inline;"> Quantitative susceptibility mapping (QSM) is a post-processing technique for deriving tissue magnetic susceptibility distribution from MRI phase measurements. Deep learning (DL) algorithms hold great potential for solving the ill-posed QSM reconstruction problem. However, a significant challenge facing current DL-QSM approaches is their limited adaptability to magnetic dipole field orientation var… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07823v2-abstract-full').style.display = 'inline'; document.getElementById('2311.07823v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.07823v2-abstract-full" style="display: none;"> Quantitative susceptibility mapping (QSM) is a post-processing technique for deriving tissue magnetic susceptibility distribution from MRI phase measurements. Deep learning (DL) algorithms hold great potential for solving the ill-posed QSM reconstruction problem. However, a significant challenge facing current DL-QSM approaches is their limited adaptability to magnetic dipole field orientation variations during training and testing. In this work, we propose a novel Orientation-Adaptive Latent Feature Editing (OA-LFE) module to learn the encoding of acquisition orientation vectors and seamlessly integrate them into the latent features of deep networks. Importantly, it can be directly Plug-and-Play (PnP) into various existing DL-QSM architectures, enabling reconstructions of QSM from arbitrary magnetic dipole orientations. Its effectiveness is demonstrated by combining the OA-LFE module into our previously proposed phase-to-susceptibility single-step instant QSM (iQSM) network, which was initially tailored for pure-axial acquisitions. The proposed OA-LFE-empowered iQSM, which we refer to as iQSM+, is trained in a self-supervised manner on a specially-designed simulation brain dataset. Comprehensive experiments are conducted on simulated and in vivo human brain datasets, encompassing subjects ranging from healthy individuals to those with pathological conditions. These experiments involve various MRI platforms (3T and 7T) and aim to compare our proposed iQSM+ against several established QSM reconstruction frameworks, including the original iQSM. The iQSM+ yields QSM images with significantly improved accuracies and mitigates artifacts, surpassing other state-of-the-art DL-QSM algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07823v2-abstract-full').style.display = 'none'; document.getElementById('2311.07823v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13pages, 9figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.06523">arXiv:2311.06523</a> <span> [<a href="https://arxiv.org/pdf/2311.06523">pdf</a>, <a href="https://arxiv.org/ps/2311.06523">ps</a>, <a href="https://arxiv.org/format/2311.06523">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MWC.016.2300547">10.1109/MWC.016.2300547 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Generative AI for Space-Air-Ground Integrated Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Jamalipour%2C+A">Abbas Jamalipour</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+P">Ping Zhang</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.06523v2-abstract-short" style="display: inline;"> Recently, generative AI technologies have emerged as a significant advancement in artificial intelligence field, renowned for their language and image generation capabilities. Meantime, space-air-ground integrated network (SAGIN) is an integral part of future B5G/6G for achieving ubiquitous connectivity. Inspired by this, this article explores an integration of generative AI in SAGIN, focusing on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.06523v2-abstract-full').style.display = 'inline'; document.getElementById('2311.06523v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.06523v2-abstract-full" style="display: none;"> Recently, generative AI technologies have emerged as a significant advancement in artificial intelligence field, renowned for their language and image generation capabilities. Meantime, space-air-ground integrated network (SAGIN) is an integral part of future B5G/6G for achieving ubiquitous connectivity. Inspired by this, this article explores an integration of generative AI in SAGIN, focusing on potential applications and case study. We first provide a comprehensive review of SAGIN and generative AI models, highlighting their capabilities and opportunities of their integration. Benefiting from generative AI's ability to generate useful data and facilitate advanced decision-making processes, it can be applied to various scenarios of SAGIN. Accordingly, we present a concise survey on their integration, including channel modeling and channel state information (CSI) estimation, joint air-space-ground resource allocation, intelligent network deployment, semantic communications, image extraction and processing, security and privacy enhancement. Next, we propose a framework that utilizes a Generative Diffusion Model (GDM) to construct channel information map to enhance quality of service for SAGIN. Simulation results demonstrate the effectiveness of the proposed framework. Finally, we discuss potential research directions for generative AI-enabled SAGIN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.06523v2-abstract-full').style.display = 'none'; document.getElementById('2311.06523v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 3 figures, Accepted at IEEE Wireless Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.02616">arXiv:2309.02616</a> <span> [<a href="https://arxiv.org/pdf/2309.02616">pdf</a>, <a href="https://arxiv.org/format/2309.02616">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Generative AI-aided Joint Training-free Secure Semantic Communications via Multi-modal Prompts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+G">Guangyuan Liu</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jiayi Zhang</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Ai%2C+B">Bo Ai</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.02616v1-abstract-short" style="display: inline;"> Semantic communication (SemCom) holds promise for reducing network resource consumption while achieving the communications goal. However, the computational overheads in jointly training semantic encoders and decoders-and the subsequent deployment in network devices-are overlooked. Recent advances in Generative artificial intelligence (GAI) offer a potential solution. The robust learning abilities… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.02616v1-abstract-full').style.display = 'inline'; document.getElementById('2309.02616v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.02616v1-abstract-full" style="display: none;"> Semantic communication (SemCom) holds promise for reducing network resource consumption while achieving the communications goal. However, the computational overheads in jointly training semantic encoders and decoders-and the subsequent deployment in network devices-are overlooked. Recent advances in Generative artificial intelligence (GAI) offer a potential solution. The robust learning abilities of GAI models indicate that semantic decoders can reconstruct source messages using a limited amount of semantic information, e.g., prompts, without joint training with the semantic encoder. A notable challenge, however, is the instability introduced by GAI's diverse generation ability. This instability, evident in outputs like text-generated images, limits the direct application of GAI in scenarios demanding accurate message recovery, such as face image transmission. To solve the above problems, this paper proposes a GAI-aided SemCom system with multi-model prompts for accurate content decoding. Moreover, in response to security concerns, we introduce the application of covert communications aided by a friendly jammer. The system jointly optimizes the diffusion step, jamming, and transmitting power with the aid of the generative diffusion models, enabling successful and secure transmission of the source messages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.02616v1-abstract-full').style.display = 'none'; document.getElementById('2309.02616v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.01426">arXiv:2309.01426</a> <span> [<a href="https://arxiv.org/pdf/2309.01426">pdf</a>, <a href="https://arxiv.org/format/2309.01426">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Unified Framework for Guiding Generative AI with Wireless Perception in Resource Constrained Mobile Edge Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Rajan%2C+D">Deepu Rajan</a>, <a href="/search/eess?searchtype=author&query=Mao%2C+S">Shiwen Mao</a>, <a href="/search/eess?searchtype=author&query=Xuemin"> Xuemin</a>, <a href="/search/eess?searchtype=author&query=Shen"> Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.01426v1-abstract-short" style="display: inline;"> With the significant advancements in artificial intelligence (AI) technologies and powerful computational capabilities, generative AI (GAI) has become a pivotal digital content generation technique for offering superior digital services. However, directing GAI towards desired outputs still suffer the inherent instability of the AI model. In this paper, we design a novel framework that utilizes wir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01426v1-abstract-full').style.display = 'inline'; document.getElementById('2309.01426v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.01426v1-abstract-full" style="display: none;"> With the significant advancements in artificial intelligence (AI) technologies and powerful computational capabilities, generative AI (GAI) has become a pivotal digital content generation technique for offering superior digital services. However, directing GAI towards desired outputs still suffer the inherent instability of the AI model. In this paper, we design a novel framework that utilizes wireless perception to guide GAI (WiPe-GAI) for providing digital content generation service, i.e., AI-generated content (AIGC), in resource-constrained mobile edge networks. Specifically, we first propose a new sequential multi-scale perception (SMSP) algorithm to predict user skeleton based on the channel state information (CSI) extracted from wireless signals. This prediction then guides GAI to provide users with AIGC, such as virtual character generation. To ensure the efficient operation of the proposed framework in resource constrained networks, we further design a pricing-based incentive mechanism and introduce a diffusion model based approach to generate an optimal pricing strategy for the service provisioning. The strategy maximizes the user's utility while enhancing the participation of the virtual service provider (VSP) in AIGC provision. The experimental results demonstrate the effectiveness of the designed framework in terms of skeleton prediction and optimal pricing strategy generation comparing with other existing solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01426v1-abstract-full').style.display = 'none'; document.getElementById('2309.01426v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.01297">arXiv:2309.01297</a> <span> [<a href="https://arxiv.org/pdf/2309.01297">pdf</a>, <a href="https://arxiv.org/format/2309.01297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Communication-Efficient Design of Learning System for Energy Demand Forecasting of Electrical Vehicles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+J">Jiacong Xu</a>, <a href="/search/eess?searchtype=author&query=Kilfoyle%2C+R">Riley Kilfoyle</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zixiang Xiong</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Ligang Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.01297v1-abstract-short" style="display: inline;"> Machine learning (ML) applications to time series energy utilization forecasting problems are a challenging assignment due to a variety of factors. Chief among these is the non-homogeneity of the energy utilization datasets and the geographical dispersion of energy consumers. Furthermore, these ML models require vast amounts of training data and communications overhead in order to develop an effec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01297v1-abstract-full').style.display = 'inline'; document.getElementById('2309.01297v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.01297v1-abstract-full" style="display: none;"> Machine learning (ML) applications to time series energy utilization forecasting problems are a challenging assignment due to a variety of factors. Chief among these is the non-homogeneity of the energy utilization datasets and the geographical dispersion of energy consumers. Furthermore, these ML models require vast amounts of training data and communications overhead in order to develop an effective model. In this paper, we propose a communication-efficient time series forecasting model combining the most recent advancements in transformer architectures implemented across a geographically dispersed series of EV charging stations and an efficient variant of federated learning (FL) to enable distributed training. The time series prediction performance and communication overhead cost of our FL are compared against their counterpart models and shown to have parity in performance while consuming significantly lower data rates during training. Additionally, the comparison is made across EV charging as well as other time series datasets to demonstrate the flexibility of our proposed model in generalized time series prediction beyond energy demand. The source code for this work is available at https://github.com/XuJiacong/LoGTST_PSGF <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01297v1-abstract-full').style.display = 'none'; document.getElementById('2309.01297v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.15394">arXiv:2308.15394</a> <span> [<a href="https://arxiv.org/pdf/2308.15394">pdf</a>, <a href="https://arxiv.org/format/2308.15394">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Decentralized Multi-agent Reinforcement Learning based State-of-Charge Balancing Strategy for Distributed Energy Storage System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zheng Xiong</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+B">Biao Luo</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+B">Bing-Chuan Wang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+X">Xiaodong Xu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xiaodong Liu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+T">Tingwen Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.15394v1-abstract-short" style="display: inline;"> This paper develops a Decentralized Multi-Agent Reinforcement Learning (Dec-MARL) method to solve the SoC balancing problem in the distributed energy storage system (DESS). First, the SoC balancing problem is formulated into a finite Markov decision process with action constraints derived from demand balance, which can be solved by Dec-MARL. Specifically, the first-order average consensus algorith… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.15394v1-abstract-full').style.display = 'inline'; document.getElementById('2308.15394v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.15394v1-abstract-full" style="display: none;"> This paper develops a Decentralized Multi-Agent Reinforcement Learning (Dec-MARL) method to solve the SoC balancing problem in the distributed energy storage system (DESS). First, the SoC balancing problem is formulated into a finite Markov decision process with action constraints derived from demand balance, which can be solved by Dec-MARL. Specifically, the first-order average consensus algorithm is utilized to expand the observations of the DESS state in a fully-decentralized way, and the initial actions (i.e., output power) are decided by the agents (i.e., energy storage units) according to these observations. In order to get the final actions in the allowable range, a counterfactual demand balance algorithm is proposed to balance the total demand and the initial actions. Next, the agents execute the final actions and get local rewards from the environment, and the DESS steps into the next state. Finally, through the first-order average consensus algorithm, the agents get the average reward and the expended observation of the next state for later training. By the above procedure, Dec-MARL reveals outstanding performance in a fully-decentralized system without any expert experience or constructing any complicated model. Besides, it is flexible and can be extended to other decentralized multi-agent systems straightforwardly. Extensive simulations have validated the effectiveness and efficiency of Dec-MARL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.15394v1-abstract-full').style.display = 'none'; document.getElementById('2308.15394v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.13736">arXiv:2308.13736</a> <span> [<a href="https://arxiv.org/pdf/2308.13736">pdf</a>, <a href="https://arxiv.org/format/2308.13736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> A Comprehensive Survey for Evaluation Methodologies of AI-Generated Music </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zeyu Xiong</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+W">Weitao Wang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+J">Jing Yu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yue Lin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Ziyan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.13736v1-abstract-short" style="display: inline;"> In recent years, AI-generated music has made significant progress, with several models performing well in multimodal and complex musical genres and scenes. While objective metrics can be used to evaluate generative music, they often lack interpretability for musical evaluation. Therefore, researchers often resort to subjective user studies to assess the quality of the generated works, which can be… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13736v1-abstract-full').style.display = 'inline'; document.getElementById('2308.13736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.13736v1-abstract-full" style="display: none;"> In recent years, AI-generated music has made significant progress, with several models performing well in multimodal and complex musical genres and scenes. While objective metrics can be used to evaluate generative music, they often lack interpretability for musical evaluation. Therefore, researchers often resort to subjective user studies to assess the quality of the generated works, which can be resource-intensive and less reproducible than objective metrics. This study aims to comprehensively evaluate the subjective, objective, and combined methodologies for assessing AI-generated music, highlighting the advantages and disadvantages of each approach. Ultimately, this study provides a valuable reference for unifying generative AI in the field of music evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13736v1-abstract-full').style.display = 'none'; document.getElementById('2308.13736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.09467">arXiv:2308.09467</a> <span> [<a href="https://arxiv.org/pdf/2308.09467">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Quantitative Susceptibility Mapping through Model-based Deep Image Prior (MoDIP) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhuang Xiong</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yang Gao</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yin Liu</a>, <a href="/search/eess?searchtype=author&query=Fazlollahi%2C+A">Amir Fazlollahi</a>, <a href="/search/eess?searchtype=author&query=Nestor%2C+P">Peter Nestor</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+H">Hongfu Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.09467v1-abstract-short" style="display: inline;"> The data-driven approach of supervised learning methods has limited applicability in solving dipole inversion in Quantitative Susceptibility Mapping (QSM) with varying scan parameters across different objects. To address this generalization issue in supervised QSM methods, we propose a novel training-free model-based unsupervised method called MoDIP (Model-based Deep Image Prior). MoDIP comprises… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09467v1-abstract-full').style.display = 'inline'; document.getElementById('2308.09467v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.09467v1-abstract-full" style="display: none;"> The data-driven approach of supervised learning methods has limited applicability in solving dipole inversion in Quantitative Susceptibility Mapping (QSM) with varying scan parameters across different objects. To address this generalization issue in supervised QSM methods, we propose a novel training-free model-based unsupervised method called MoDIP (Model-based Deep Image Prior). MoDIP comprises a small, untrained network and a Data Fidelity Optimization (DFO) module. The network converges to an interim state, acting as an implicit prior for image regularization, while the optimization process enforces the physical model of QSM dipole inversion. Experimental results demonstrate MoDIP's excellent generalizability in solving QSM dipole inversion across different scan parameters. It exhibits robustness against pathological brain QSM, achieving over 32% accuracy improvement than supervised deep learning and traditional iterative methods. It is also 33% more computationally efficient and runs 4 times faster than conventional DIP-based approaches, enabling 3D high-resolution image reconstruction in under 4.5 minutes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09467v1-abstract-full').style.display = 'none'; document.getElementById('2308.09467v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.07618">arXiv:2308.07618</a> <span> [<a href="https://arxiv.org/pdf/2308.07618">pdf</a>, <a href="https://arxiv.org/format/2308.07618">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Vision-based Semantic Communications for Metaverse Services: A Contest Theoretic Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+G">Guangyuan Liu</a>, <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Soong%2C+B+H">Boon Hee Soong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.07618v1-abstract-short" style="display: inline;"> The popularity of Metaverse as an entertainment, social, and work platform has led to a great need for seamless avatar integration in the virtual world. In Metaverse, avatars must be updated and rendered to reflect users' behaviour. Achieving real-time synchronization between the virtual bilocation and the user is complex, placing high demands on the Metaverse Service Provider (MSP)'s rendering re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07618v1-abstract-full').style.display = 'inline'; document.getElementById('2308.07618v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.07618v1-abstract-full" style="display: none;"> The popularity of Metaverse as an entertainment, social, and work platform has led to a great need for seamless avatar integration in the virtual world. In Metaverse, avatars must be updated and rendered to reflect users' behaviour. Achieving real-time synchronization between the virtual bilocation and the user is complex, placing high demands on the Metaverse Service Provider (MSP)'s rendering resource allocation scheme. To tackle this issue, we propose a semantic communication framework that leverages contest theory to model the interactions between users and MSPs and determine optimal resource allocation for each user. To reduce the consumption of network resources in wireless transmission, we use the semantic communication technique to reduce the amount of data to be transmitted. Under our simulation settings, the encoded semantic data only contains 51 bytes of skeleton coordinates instead of the image size of 8.243 megabytes. Moreover, we implement Deep Q-Network to optimize reward settings for maximum performance and efficient resource allocation. With the optimal reward setting, users are incentivized to select their respective suitable uploading frequency, reducing down-sampling loss due to rendering resource constraints by 66.076\% compared with the traditional average distribution method. The framework provides a novel solution to resource allocation for avatar association in VR environments, ensuring a smooth and immersive experience for all users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07618v1-abstract-full').style.display = 'none'; document.getElementById('2308.07618v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages,7figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.05384">arXiv:2308.05384</a> <span> [<a href="https://arxiv.org/pdf/2308.05384">pdf</a>, <a href="https://arxiv.org/format/2308.05384">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Deep Reinforcement Learning: A Tutorial on Generative Diffusion Models in Network Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yinqiu Liu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yijing Lin</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zonghang Li</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+S">Shuguang Cui</a>, <a href="/search/eess?searchtype=author&query=Ai%2C+B">Bo Ai</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+H">Haibo Zhou</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.05384v2-abstract-short" style="display: inline;"> Generative Diffusion Models (GDMs) have emerged as a transformative force in the realm of Generative Artificial Intelligence (GenAI), demonstrating their versatility and efficacy across various applications. The ability to model complex data distributions and generate high-quality samples has made GDMs particularly effective in tasks such as image generation and reinforcement learning. Furthermore… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05384v2-abstract-full').style.display = 'inline'; document.getElementById('2308.05384v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.05384v2-abstract-full" style="display: none;"> Generative Diffusion Models (GDMs) have emerged as a transformative force in the realm of Generative Artificial Intelligence (GenAI), demonstrating their versatility and efficacy across various applications. The ability to model complex data distributions and generate high-quality samples has made GDMs particularly effective in tasks such as image generation and reinforcement learning. Furthermore, their iterative nature, which involves a series of noise addition and denoising steps, is a powerful and unique approach to learning and generating data. This paper serves as a comprehensive tutorial on applying GDMs in network optimization tasks. We delve into the strengths of GDMs, emphasizing their wide applicability across various domains, such as vision, text, and audio generation. We detail how GDMs can be effectively harnessed to solve complex optimization problems inherent in networks. The paper first provides a basic background of GDMs and their applications in network optimization. This is followed by a series of case studies, showcasing the integration of GDMs with Deep Reinforcement Learning (DRL), incentive mechanism design, Semantic Communications (SemCom), Internet of Vehicles (IoV) networks, etc. These case studies underscore the practicality and efficacy of GDMs in real-world scenarios, offering insights into network design. We conclude with a discussion on potential future directions for GDM research and applications, providing major insights into how they can continue to shape the future of network optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05384v2-abstract-full').style.display = 'none'; document.getElementById('2308.05384v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by IEEE Communications Surveys & Tutorials (COMST)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.10974">arXiv:2307.10974</a> <span> [<a href="https://arxiv.org/pdf/2307.10974">pdf</a>, <a href="https://arxiv.org/format/2307.10974">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Deep Multi-Threshold Spiking-UNet for Image Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+H">Hebei Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yueyi Zhang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+X">Xiaoyan Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.10974v4-abstract-short" style="display: inline;"> U-Net, known for its simple yet efficient architecture, is widely utilized for image processing tasks and is particularly suitable for deployment on neuromorphic chips. This paper introduces the novel concept of Spiking-UNet for image processing, which combines the power of Spiking Neural Networks (SNNs) with the U-Net architecture. To achieve an efficient Spiking-UNet, we face two primary challen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.10974v4-abstract-full').style.display = 'inline'; document.getElementById('2307.10974v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.10974v4-abstract-full" style="display: none;"> U-Net, known for its simple yet efficient architecture, is widely utilized for image processing tasks and is particularly suitable for deployment on neuromorphic chips. This paper introduces the novel concept of Spiking-UNet for image processing, which combines the power of Spiking Neural Networks (SNNs) with the U-Net architecture. To achieve an efficient Spiking-UNet, we face two primary challenges: ensuring high-fidelity information propagation through the network via spikes and formulating an effective training strategy. To address the issue of information loss, we introduce multi-threshold spiking neurons, which improve the efficiency of information transmission within the Spiking-UNet. For the training strategy, we adopt a conversion and fine-tuning pipeline that leverage pre-trained U-Net models. During the conversion process, significant variability in data distribution across different parts is observed when utilizing skip connections. Therefore, we propose a connection-wise normalization method to prevent inaccurate firing rates. Furthermore, we adopt a flow-based training method to fine-tune the converted models, reducing time steps while preserving performance. Experimental results show that, on image segmentation and denoising, our Spiking-UNet achieves comparable performance to its non-spiking counterpart, surpassing existing SNN methods. Compared with the converted Spiking-UNet without fine-tuning, our Spiking-UNet reduces inference time by approximately 90\%. This research broadens the application scope of SNNs in image processing and is expected to inspire further exploration in the field of neuromorphic engineering. The code for our Spiking-UNet implementation is available at https://github.com/SNNresearch/Spiking-UNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.10974v4-abstract-full').style.display = 'none'; document.getElementById('2307.10974v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in NeuroComputing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.14683">arXiv:2306.14683</a> <span> [<a href="https://arxiv.org/pdf/2306.14683">pdf</a>, <a href="https://arxiv.org/format/2306.14683">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agent Deep Reinforcement Learning for Dynamic Avatar Migration in AIoT-enabled Vehicular Metaverses with Trajectory Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+J">Junlong Chen</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+M">Minrui Xu</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Chuan Chen</a>, <a href="/search/eess?searchtype=author&query=Jamalipour%2C+A">Abbas Jamalipour</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+S">Shengli Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.14683v1-abstract-short" style="display: inline;"> Avatars, as promising digital assistants in Vehicular Metaverses, can enable drivers and passengers to immerse in 3D virtual spaces, serving as a practical emerging example of Artificial Intelligence of Things (AIoT) in intelligent vehicular environments. The immersive experience is achieved through seamless human-avatar interaction, e.g., augmented reality navigation, which requires intensive res… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14683v1-abstract-full').style.display = 'inline'; document.getElementById('2306.14683v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.14683v1-abstract-full" style="display: none;"> Avatars, as promising digital assistants in Vehicular Metaverses, can enable drivers and passengers to immerse in 3D virtual spaces, serving as a practical emerging example of Artificial Intelligence of Things (AIoT) in intelligent vehicular environments. The immersive experience is achieved through seamless human-avatar interaction, e.g., augmented reality navigation, which requires intensive resources that are inefficient and impractical to process on intelligent vehicles locally. Fortunately, offloading avatar tasks to RoadSide Units (RSUs) or cloud servers for remote execution can effectively reduce resource consumption. However, the high mobility of vehicles, the dynamic workload of RSUs, and the heterogeneity of RSUs pose novel challenges to making avatar migration decisions. To address these challenges, in this paper, we propose a dynamic migration framework for avatar tasks based on real-time trajectory prediction and Multi-Agent Deep Reinforcement Learning (MADRL). Specifically, we propose a model to predict the future trajectories of intelligent vehicles based on their historical data, indicating the future workloads of RSUs.Based on the expected workloads of RSUs, we formulate the avatar task migration problem as a long-term mixed integer programming problem. To tackle this problem efficiently, the problem is transformed into a Partially Observable Markov Decision Process (POMDP) and solved by multiple DRL agents with hybrid continuous and discrete actions in decentralized. Numerical results demonstrate that our proposed algorithm can effectively reduce the latency of executing avatar tasks by around 25% without prediction and 30% with prediction and enhance user immersive experiences in the AIoT-enabled Vehicular Metaverse (AeVeM). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14683v1-abstract-full').style.display = 'none'; document.getElementById('2306.14683v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.12675">arXiv:2306.12675</a> <span> [<a href="https://arxiv.org/pdf/2306.12675">pdf</a>, <a href="https://arxiv.org/format/2306.12675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> STAR-RIS-Assisted Privacy Protection in Semantic Communication System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yiru Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+W">Wanting Yang</a>, <a href="/search/eess?searchtype=author&query=Guan%2C+P">Pengxin Guan</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yuping Zhao</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.12675v1-abstract-short" style="display: inline;"> Semantic communication (SemCom) has emerged as a promising architecture in the realm of intelligent communication paradigms. SemCom involves extracting and compressing the core information at the transmitter while enabling the receiver to interpret it based on established knowledge bases (KBs). This approach enhances communication efficiency greatly. However, the open nature of wireless transmissi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.12675v1-abstract-full').style.display = 'inline'; document.getElementById('2306.12675v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.12675v1-abstract-full" style="display: none;"> Semantic communication (SemCom) has emerged as a promising architecture in the realm of intelligent communication paradigms. SemCom involves extracting and compressing the core information at the transmitter while enabling the receiver to interpret it based on established knowledge bases (KBs). This approach enhances communication efficiency greatly. However, the open nature of wireless transmission and the presence of homogeneous KBs among subscribers of identical data type pose a risk of privacy leakage in SemCom. To address this challenge, we propose to leverage the simultaneous transmitting and reflecting reconfigurable intelligent surface (STAR-RIS) to achieve privacy protection in a SemCom system. In this system, the STAR-RIS is utilized to enhance the signal transmission of the SemCom between a base station and a destination user, as well as to covert the signal to interference specifically for the eavesdropper (Eve). Simulation results demonstrate that our generated task-level disturbance outperforms other benchmarks in protecting SemCom privacy, as evidenced by the significantly lower task success rate achieved by Eve. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.12675v1-abstract-full').style.display = 'none'; document.getElementById('2306.12675v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.18994">arXiv:2305.18994</a> <span> [<a href="https://arxiv.org/pdf/2305.18994">pdf</a>, <a href="https://arxiv.org/format/2305.18994">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Toward Real-World Light Field Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zeyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+R">Ruisheng Gao</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yutong Liu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yueyi Zhang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.18994v1-abstract-short" style="display: inline;"> Deep learning has opened up new possibilities for light field super-resolution (SR), but existing methods trained on synthetic datasets with simple degradations (e.g., bicubic downsampling) suffer from poor performance when applied to complex real-world scenarios. To address this problem, we introduce LytroZoom, the first real-world light field SR dataset capturing paired low- and high-resolution… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.18994v1-abstract-full').style.display = 'inline'; document.getElementById('2305.18994v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.18994v1-abstract-full" style="display: none;"> Deep learning has opened up new possibilities for light field super-resolution (SR), but existing methods trained on synthetic datasets with simple degradations (e.g., bicubic downsampling) suffer from poor performance when applied to complex real-world scenarios. To address this problem, we introduce LytroZoom, the first real-world light field SR dataset capturing paired low- and high-resolution light fields of diverse indoor and outdoor scenes using a Lytro ILLUM camera. Additionally, we propose the Omni-Frequency Projection Network (OFPNet), which decomposes the omni-frequency components and iteratively enhances them through frequency projection operations to address spatially variant degradation processes present in all frequency components. Experiments demonstrate that models trained on LytroZoom outperform those trained on synthetic datasets and are generalizable to diverse content and devices. Quantitative and qualitative evaluations verify the superiority of OFPNet. We believe this work will inspire future research in real-world light field SR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.18994v1-abstract-full').style.display = 'none'; document.getElementById('2305.18994v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPRW 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.14506">arXiv:2303.14506</a> <span> [<a href="https://arxiv.org/pdf/2303.14506">pdf</a>, <a href="https://arxiv.org/format/2303.14506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TPAMI.2024.3401048">10.1109/TPAMI.2024.3401048 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Toward DNN of LUTs: Learning Efficient Image Restoration with Multiple Look-Up Tables </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+J">Jiacheng Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Chang Chen</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+Z">Zhen Cheng</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.14506v1-abstract-short" style="display: inline;"> The widespread usage of high-definition screens on edge devices stimulates a strong demand for efficient image restoration algorithms. The way of caching deep learning models in a look-up table (LUT) is recently introduced to respond to this demand. However, the size of a single LUT grows exponentially with the increase of its indexing capacity, which restricts its receptive field and thus the per… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14506v1-abstract-full').style.display = 'inline'; document.getElementById('2303.14506v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.14506v1-abstract-full" style="display: none;"> The widespread usage of high-definition screens on edge devices stimulates a strong demand for efficient image restoration algorithms. The way of caching deep learning models in a look-up table (LUT) is recently introduced to respond to this demand. However, the size of a single LUT grows exponentially with the increase of its indexing capacity, which restricts its receptive field and thus the performance. To overcome this intrinsic limitation of the single-LUT solution, we propose a universal method to construct multiple LUTs like a neural network, termed MuLUT. Firstly, we devise novel complementary indexing patterns, as well as a general implementation for arbitrary patterns, to construct multiple LUTs in parallel. Secondly, we propose a re-indexing mechanism to enable hierarchical indexing between cascaded LUTs. Finally, we introduce channel indexing to allow cross-channel interaction, enabling LUTs to process color channels jointly. In these principled ways, the total size of MuLUT is linear to its indexing capacity, yielding a practical solution to obtain superior performance with the enlarged receptive field. We examine the advantage of MuLUT on various image restoration tasks, including super-resolution, demosaicing, denoising, and deblocking. MuLUT achieves a significant improvement over the single-LUT solution, e.g., up to 1.1dB PSNR for super-resolution and up to 2.8dB PSNR for grayscale denoising, while preserving its efficiency, which is 100$\times$ less in energy cost compared with lightweight deep neural networks. Our code and trained models are publicly available at https://github.com/ddlee-cn/MuLUT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14506v1-abstract-full').style.display = 'none'; document.getElementById('2303.14506v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://mulut.pages.dev/</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Pattern Analysis and Machine Intelligence (T-PAMI), 2024, early access </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.01896">arXiv:2303.01896</a> <span> [<a href="https://arxiv.org/pdf/2303.01896">pdf</a>, <a href="https://arxiv.org/format/2303.01896">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> AI-Generated Incentive Mechanism and Full-Duplex Semantic Communications for Information Sharing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.01896v2-abstract-short" style="display: inline;"> The next generation of Internet services, such as Metaverse, rely on mixed reality (MR) technology to provide immersive user experiences. However, the limited computation power of MR headset-mounted devices (HMDs) hinders the deployment of such services. Therefore, we propose an efficient information sharing scheme based on full-duplex device-to-device (D2D) semantic communications to address this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01896v2-abstract-full').style.display = 'inline'; document.getElementById('2303.01896v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.01896v2-abstract-full" style="display: none;"> The next generation of Internet services, such as Metaverse, rely on mixed reality (MR) technology to provide immersive user experiences. However, the limited computation power of MR headset-mounted devices (HMDs) hinders the deployment of such services. Therefore, we propose an efficient information sharing scheme based on full-duplex device-to-device (D2D) semantic communications to address this issue. Our approach enables users to avoid heavy and repetitive computational tasks, such as artificial intelligence-generated content (AIGC) in the view images of all MR users. Specifically, a user can transmit the generated content and semantic information extracted from their view image to nearby users, who can then use this information to obtain the spatial matching of computation results under their view images. We analyze the performance of full-duplex D2D communications, including the achievable rate and bit error probability, by using generalized small-scale fading models. To facilitate semantic information sharing among users, we design a contract theoretic AI-generated incentive mechanism. The proposed diffusion model generates the optimal contract design, outperforming two deep reinforcement learning algorithms, i.e., proximal policy optimization and soft actor-critic algorithms. Our numerical analysis experiment proves the effectiveness of our proposed methods. The code for this paper is available at https://github.com/HongyangDu/SemSharing <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01896v2-abstract-full').style.display = 'none'; document.getElementById('2303.01896v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE JSAC</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.01346">arXiv:2303.01346</a> <span> [<a href="https://arxiv.org/pdf/2303.01346">pdf</a>, <a href="https://arxiv.org/format/2303.01346">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Co-learning Planning and Control Policies Constrained by Differentiable Logic Specifications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zikang Xiong</a>, <a href="/search/eess?searchtype=author&query=Lawson%2C+D">Daniel Lawson</a>, <a href="/search/eess?searchtype=author&query=Eappen%2C+J">Joe Eappen</a>, <a href="/search/eess?searchtype=author&query=Qureshi%2C+A+H">Ahmed H. Qureshi</a>, <a href="/search/eess?searchtype=author&query=Jagannathan%2C+S">Suresh Jagannathan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.01346v3-abstract-short" style="display: inline;"> Synthesizing planning and control policies in robotics is a fundamental task, further complicated by factors such as complex logic specifications and high-dimensional robot dynamics. This paper presents a novel reinforcement learning approach to solving high-dimensional robot navigation tasks with complex logic specifications by co-learning planning and control policies. Notably, this approach sig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01346v3-abstract-full').style.display = 'inline'; document.getElementById('2303.01346v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.01346v3-abstract-full" style="display: none;"> Synthesizing planning and control policies in robotics is a fundamental task, further complicated by factors such as complex logic specifications and high-dimensional robot dynamics. This paper presents a novel reinforcement learning approach to solving high-dimensional robot navigation tasks with complex logic specifications by co-learning planning and control policies. Notably, this approach significantly reduces the sample complexity in training, allowing us to train high-quality policies with much fewer samples compared to existing reinforcement learning algorithms. In addition, our methodology streamlines complex specification extraction from map images and enables the efficient generation of long-horizon robot motion paths across different map layouts. Moreover, our approach also demonstrates capabilities for high-dimensional control and avoiding suboptimal policies via policy alignment. The efficacy of our approach is demonstrated through experiments involving simulated high-dimensional quadruped robot dynamics and a real-world differential drive robot (TurtleBot3) under different types of task specifications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01346v3-abstract-full').style.display = 'none'; document.getElementById('2303.01346v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.03220">arXiv:2301.03220</a> <span> [<a href="https://arxiv.org/pdf/2301.03220">pdf</a>, <a href="https://arxiv.org/format/2301.03220">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Enabling AI-Generated Content (AIGC) Services in Wireless Edge Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zonghang Li</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Xuemin"> Xuemin</a>, <a href="/search/eess?searchtype=author&query=Shen"> Shen</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.03220v1-abstract-short" style="display: inline;"> Artificial Intelligence-Generated Content (AIGC) refers to the use of AI to automate the information creation process while fulfilling the personalized requirements of users. However, due to the instability of AIGC models, e.g., the stochastic nature of diffusion models, the quality and accuracy of the generated content can vary significantly. In wireless edge networks, the transmission of incorre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.03220v1-abstract-full').style.display = 'inline'; document.getElementById('2301.03220v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.03220v1-abstract-full" style="display: none;"> Artificial Intelligence-Generated Content (AIGC) refers to the use of AI to automate the information creation process while fulfilling the personalized requirements of users. However, due to the instability of AIGC models, e.g., the stochastic nature of diffusion models, the quality and accuracy of the generated content can vary significantly. In wireless edge networks, the transmission of incorrectly generated content may unnecessarily consume network resources. Thus, a dynamic AIGC service provider (ASP) selection scheme is required to enable users to connect to the most suited ASP, improving the users' satisfaction and quality of generated content. In this article, we first review the AIGC techniques and their applications in wireless networks. We then present the AIGC-as-a-service (AaaS) concept and discuss the challenges in deploying AaaS at the edge networks. Yet, it is essential to have performance metrics to evaluate the accuracy of AIGC services. Thus, we introduce several image-based perceived quality evaluation metrics. Then, we propose a general and effective model to illustrate the relationship between computational resources and user-perceived quality evaluation metrics. To achieve efficient AaaS and maximize the quality of generated content in wireless edge networks, we propose a deep reinforcement learning-enabled algorithm for optimal ASP selection. Simulation results show that the proposed algorithm can provide a higher quality of generated content to users and achieve fewer crashed tasks by comparing with four benchmarks, i.e., overloading-avoidance, random, round-robin policies, and the upper-bound schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.03220v1-abstract-full').style.display = 'none'; document.getElementById('2301.03220v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.14771">arXiv:2211.14771</a> <span> [<a href="https://arxiv.org/pdf/2211.14771">pdf</a>, <a href="https://arxiv.org/format/2211.14771">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Performance Analysis of Free-Space Information Sharing in Full-Duplex Semantic Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a>, <a href="/search/eess?searchtype=author&query=Soong%2C+B+H">Boon Hee Soong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.14771v1-abstract-short" style="display: inline;"> In next-generation Internet services, such as Metaverse, the mixed reality (MR) technique plays a vital role. Yet the limited computing capacity of the user-side MR headset-mounted device (HMD) prevents its further application, especially in scenarios that require a lot of computation. One way out of this dilemma is to design an efficient information sharing scheme among users to replace the heavy… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.14771v1-abstract-full').style.display = 'inline'; document.getElementById('2211.14771v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.14771v1-abstract-full" style="display: none;"> In next-generation Internet services, such as Metaverse, the mixed reality (MR) technique plays a vital role. Yet the limited computing capacity of the user-side MR headset-mounted device (HMD) prevents its further application, especially in scenarios that require a lot of computation. One way out of this dilemma is to design an efficient information sharing scheme among users to replace the heavy and repetitive computation. In this paper, we propose a free-space information sharing mechanism based on full-duplex device-to-device (D2D) semantic communications. Specifically, the view images of MR users in the same real-world scenario may be analogous. Therefore, when one user (i.e., a device) completes some computation tasks, the user can send his own calculation results and the semantic features extracted from the user's own view image to nearby users (i.e., other devices). On this basis, other users can use the received semantic features to obtain the spatial matching of the computational results under their own view images without repeating the computation. Using generalized small-scale fading models, we analyze the key performance indicators of full-duplex D2D communications, including channel capacity and bit error probability, which directly affect the transmission of semantic information. Finally, the numerical analysis experiment proves the effectiveness of our proposed methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.14771v1-abstract-full').style.display = 'none'; document.getElementById('2211.14771v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.12727">arXiv:2211.12727</a> <span> [<a href="https://arxiv.org/pdf/2211.12727">pdf</a>, <a href="https://arxiv.org/format/2211.12727">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Semantic Communications for Wireless Sensing: RIS-aided Encoding and Self-supervised Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Junshan Zhang</a>, <a href="/search/eess?searchtype=author&query=Xuemin"> Xuemin</a>, <a href="/search/eess?searchtype=author&query=Shen"> Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.12727v2-abstract-short" style="display: inline;"> Semantic communications can reduce the resource consumption by transmitting task-related semantic information extracted from source messages. However, when the source messages are utilized for various tasks, e.g., wireless sensing data for localization and activities detection, semantic communication technique is difficult to be implemented because of the increased processing complexity. In this p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.12727v2-abstract-full').style.display = 'inline'; document.getElementById('2211.12727v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.12727v2-abstract-full" style="display: none;"> Semantic communications can reduce the resource consumption by transmitting task-related semantic information extracted from source messages. However, when the source messages are utilized for various tasks, e.g., wireless sensing data for localization and activities detection, semantic communication technique is difficult to be implemented because of the increased processing complexity. In this paper, we propose the inverse semantic communications as a new paradigm. Instead of extracting semantic information from messages, we aim to encode the task-related source messages into a hyper-source message for data transmission or storage. Following this paradigm, we design an inverse semantic-aware wireless sensing framework with three algorithms for data sampling, reconfigurable intelligent surface (RIS)-aided encoding, and self-supervised decoding, respectively. Specifically, on the one hand, we propose a novel RIS hardware design for encoding several signal spectrums into one MetaSpectrum. To select the task-related signal spectrums for achieving efficient encoding, a semantic hash sampling method is introduced. On the other hand, we propose a self-supervised learning method for decoding the MetaSpectrums to obtain the original signal spectrums. Using the sensing data collected from real-world, we show that our framework can reduce the data volume by 95% compared to that before encoding, without affecting the accomplishment of sensing tasks. Moreover, compared with the typically used uniform sampling scheme, the proposed semantic hash sampling scheme can achieve 67% lower mean squared error in recovering the sensing parameters. In addition, experiment results demonstrate that the amplitude response matrix of the RIS enables the encryption of the sensing data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.12727v2-abstract-full').style.display = 'none'; document.getElementById('2211.12727v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03058">arXiv:2211.03058</a> <span> [<a href="https://arxiv.org/pdf/2211.03058">pdf</a>, <a href="https://arxiv.org/format/2211.03058">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Towards Real World HDRTV Reconstruction: A Data Synthesis-based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cheng%2C+Z">Zhen Cheng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yong Li</a>, <a href="/search/eess?searchtype=author&query=Song%2C+F">Fenglong Song</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Chang Chen</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03058v1-abstract-short" style="display: inline;"> Existing deep learning based HDRTV reconstruction methods assume one kind of tone mapping operators (TMOs) as the degradation procedure to synthesize SDRTV-HDRTV pairs for supervised training. In this paper, we argue that, although traditional TMOs exploit efficient dynamic range compression priors, they have several drawbacks on modeling the realistic degradation: information over-preservation, c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03058v1-abstract-full').style.display = 'inline'; document.getElementById('2211.03058v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03058v1-abstract-full" style="display: none;"> Existing deep learning based HDRTV reconstruction methods assume one kind of tone mapping operators (TMOs) as the degradation procedure to synthesize SDRTV-HDRTV pairs for supervised training. In this paper, we argue that, although traditional TMOs exploit efficient dynamic range compression priors, they have several drawbacks on modeling the realistic degradation: information over-preservation, color bias and possible artifacts, making the trained reconstruction networks hard to generalize well to real-world cases. To solve this problem, we propose a learning-based data synthesis approach to learn the properties of real-world SDRTVs by integrating several tone mapping priors into both network structures and loss functions. In specific, we design a conditioned two-stream network with prior tone mapping results as a guidance to synthesize SDRTVs by both global and local transformations. To train the data synthesis network, we form a novel self-supervised content loss to constraint different aspects of the synthesized SDRTVs at regions with different brightness distributions and an adversarial loss to emphasize the details to be more realistic. To validate the effectiveness of our approach, we synthesize SDRTV-HDRTV pairs with our method and use them to train several HDRTV reconstruction networks. Then we collect two inference datasets containing both labeled and unlabeled real-world SDRTVs, respectively. Experimental results demonstrate that, the networks trained with our synthesized data generalize significantly better to these two real-world datasets than existing solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03058v1-abstract-full').style.display = 'none'; document.getElementById('2211.03058v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.00481">arXiv:2211.00481</a> <span> [<a href="https://arxiv.org/pdf/2211.00481">pdf</a>, <a href="https://arxiv.org/format/2211.00481">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-Resource Allocation for On-Device Distributed Federated Learning Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yulan Gao</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+Z">Ziqiang Ye</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+H">Han Yu</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yue Xiao</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.00481v1-abstract-short" style="display: inline;"> This work poses a distributed multi-resource allocation scheme for minimizing the weighted sum of latency and energy consumption in the on-device distributed federated learning (FL) system. Each mobile device in the system engages the model training process within the specified area and allocates its computation and communication resources for deriving and uploading parameters, respectively, to mi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00481v1-abstract-full').style.display = 'inline'; document.getElementById('2211.00481v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.00481v1-abstract-full" style="display: none;"> This work poses a distributed multi-resource allocation scheme for minimizing the weighted sum of latency and energy consumption in the on-device distributed federated learning (FL) system. Each mobile device in the system engages the model training process within the specified area and allocates its computation and communication resources for deriving and uploading parameters, respectively, to minimize the objective of system subject to the computation/communication budget and a target latency requirement. In particular, mobile devices are connect via wireless TCP/IP architectures. Exploiting the optimization problem structure, the problem can be decomposed to two convex sub-problems. Drawing on the Lagrangian dual and harmony search techniques, we characterize the global optimal solution by the closed-form solutions to all sub-problems, which give qualitative insights to multi-resource tradeoff. Numerical simulations are used to validate the analysis and assess the performance of the proposed algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00481v1-abstract-full').style.display = 'none'; document.getElementById('2211.00481v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.12995">arXiv:2210.12995</a> <span> [<a href="https://arxiv.org/pdf/2210.12995">pdf</a>, <a href="https://arxiv.org/format/2210.12995">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> TridentSE: Guiding Speech Enhancement with 32 Global Tokens </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yin%2C+D">Dacheng Yin</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Z">Zhiyuan Zhao</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+C">Chuanxin Tang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zhiwei Xiong</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+C">Chong Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.12995v1-abstract-short" style="display: inline;"> In this paper, we present TridentSE, a novel architecture for speech enhancement, which is capable of efficiently capturing both global information and local details. TridentSE maintains T-F bin level representation to capture details, and uses a small number of global tokens to process the global information. Information is propagated between the local and the global representations through cross… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12995v1-abstract-full').style.display = 'inline'; document.getElementById('2210.12995v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.12995v1-abstract-full" style="display: none;"> In this paper, we present TridentSE, a novel architecture for speech enhancement, which is capable of efficiently capturing both global information and local details. TridentSE maintains T-F bin level representation to capture details, and uses a small number of global tokens to process the global information. Information is propagated between the local and the global representations through cross attention modules. To capture both inter- and intra-frame information, the global tokens are divided into two groups to process along the time and the frequency axis respectively. A metric discriminator is further employed to guide our model to achieve higher perceptual quality. Even with significantly lower computational cost, TridentSE outperforms a variety of previous speech enhancement methods, achieving a PESQ of 3.47 on VoiceBank+DEMAND dataset and a PESQ of 3.44 on DNS no-reverb test set. Visualization shows that the global tokens learn diverse and interpretable global patterns. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12995v1-abstract-full').style.display = 'none'; document.getElementById('2210.12995v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.04308">arXiv:2210.04308</a> <span> [<a href="https://arxiv.org/pdf/2210.04308">pdf</a>, <a href="https://arxiv.org/format/2210.04308">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/JSTSP.2022.3224591">10.1109/JSTSP.2022.3224591 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Privacy-preserving Intelligent Resource Allocation for Federated Edge Learning in Quantum Internet </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+M">Minrui Xu</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Z">Zhaohui Yang</a>, <a href="/search/eess?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a>, <a href="/search/eess?searchtype=author&query=Xuemin"> Xuemin</a>, <a href="/search/eess?searchtype=author&query=Shen"> Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.04308v1-abstract-short" style="display: inline;"> Federated edge learning (FEL) is a promising paradigm of distributed machine learning that can preserve data privacy while training the global model collaboratively. However, FEL is still facing model confidentiality issues due to eavesdropping risks of exchanging cryptographic keys through traditional encryption schemes. Therefore, in this paper, we propose a hierarchical architecture for quantum… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.04308v1-abstract-full').style.display = 'inline'; document.getElementById('2210.04308v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.04308v1-abstract-full" style="display: none;"> Federated edge learning (FEL) is a promising paradigm of distributed machine learning that can preserve data privacy while training the global model collaboratively. However, FEL is still facing model confidentiality issues due to eavesdropping risks of exchanging cryptographic keys through traditional encryption schemes. Therefore, in this paper, we propose a hierarchical architecture for quantum-secured FEL systems with ideal security based on the quantum key distribution (QKD) to facilitate public key and model encryption against eavesdropping attacks. Specifically, we propose a stochastic resource allocation model for efficient QKD to encrypt FEL keys and models. In FEL systems, remote FEL workers are connected to cluster heads via quantum-secured channels to train an aggregated global model collaboratively. However, due to the unpredictable number of workers at each location, the demand for secret-key rates to support secure model transmission to the server is unpredictable. The proposed systems need to efficiently allocate limited QKD resources (i.e., wavelengths) such that the total cost is minimized in the presence of stochastic demand by formulating the optimization problem for the proposed architecture as a stochastic programming model. To this end, we propose a federated reinforcement learning-based resource allocation scheme to solve the proposed model without complete state information. The proposed scheme enables QKD managers and controllers to train a global QKD resource allocation policy while keeping their private experiences local. Numerical results demonstrate that the proposed schemes can successfully achieve the cost-minimizing objective under uncertain demand while improving the training efficiency by about 50\% compared to state-of-the-art schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.04308v1-abstract-full').style.display = 'none'; document.getElementById('2210.04308v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xiong%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xiong%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xiong%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository