Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 156 results for author: <span class="mathjax">Yu, W</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Yu%2C+W">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Yu, W"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Yu%2C+W&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Yu, W"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yu%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yu%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yu%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yu%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yu%2C+W&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.20290">arXiv:2503.20290</a> <span> [<a href="https://arxiv.org/pdf/2503.20290">pdf</a>, <a href="https://arxiv.org/format/2503.20290">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> QualiSpeech: A Speech Quality Assessment Dataset with Natural Language Reasoning and Descriptions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Siyin Wang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenyi Yu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xianzhao Chen</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+X">Xiaohai Tian</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&query=Tsao%2C+Y">Yu Tsao</a>, <a href="/search/eess?searchtype=author&query=Yamagishi%2C+J">Junichi Yamagishi</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.20290v1-abstract-short" style="display: inline;"> This paper explores a novel perspective to speech quality assessment by leveraging natural language descriptions, offering richer, more nuanced insights than traditional numerical scoring methods. Natural language feedback provides instructive recommendations and detailed evaluations, yet existing datasets lack the comprehensive annotations needed for this approach. To bridge this gap, we introduc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.20290v1-abstract-full').style.display = 'inline'; document.getElementById('2503.20290v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.20290v1-abstract-full" style="display: none;"> This paper explores a novel perspective to speech quality assessment by leveraging natural language descriptions, offering richer, more nuanced insights than traditional numerical scoring methods. Natural language feedback provides instructive recommendations and detailed evaluations, yet existing datasets lack the comprehensive annotations needed for this approach. To bridge this gap, we introduce QualiSpeech, a comprehensive low-level speech quality assessment dataset encompassing 11 key aspects and detailed natural language comments that include reasoning and contextual insights. Additionally, we propose the QualiSpeech Benchmark to evaluate the low-level speech understanding capabilities of auditory large language models (LLMs). Experimental results demonstrate that finetuned auditory LLMs can reliably generate detailed descriptions of noise and distortion, effectively identifying their types and temporal characteristics. The results further highlight the potential for incorporating reasoning to enhance the accuracy and reliability of quality assessments. The dataset will be released at https://huggingface.co/datasets/tsinghua-ee/QualiSpeech. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.20290v1-abstract-full').style.display = 'none'; document.getElementById('2503.20290v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 16 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.19046">arXiv:2503.19046</a> <span> [<a href="https://arxiv.org/pdf/2503.19046">pdf</a>, <a href="https://arxiv.org/format/2503.19046">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning Beamforming Codebooks for Active Sensing with Reconfigurable Intelligent Surface </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhongze Zhang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.19046v1-abstract-short" style="display: inline;"> This paper explores the design of beamforming codebooks for the base station (BS) and for the reconfigurable intelligent surfaces (RISs) in an active sensing scheme for uplink localization, in which the mobile user transmits a sequence of pilots to the BS through reflection at the RISs, and the BS and the RISs are adaptively configured by carefully choosing BS beamforming codeword and RIS codeword… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.19046v1-abstract-full').style.display = 'inline'; document.getElementById('2503.19046v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.19046v1-abstract-full" style="display: none;"> This paper explores the design of beamforming codebooks for the base station (BS) and for the reconfigurable intelligent surfaces (RISs) in an active sensing scheme for uplink localization, in which the mobile user transmits a sequence of pilots to the BS through reflection at the RISs, and the BS and the RISs are adaptively configured by carefully choosing BS beamforming codeword and RIS codewords from their respective codebooks in a sequential manner to progressively focus onto the user. Most existing codebook designs for RIS are not tailored for active sensing, by which we mean the choice of the next codeword should depend on the measurements made so far, and the sequence of codewords should dynamically focus reflection toward the user. Moreover, most existing codeword selection methods rely on exhaustive search in beam training to identify the codeword with the highest signal-to-noise ratio (SNR), thus incurring substantial pilot overhead as the size of the codebook scales. This paper proposes learning-based approaches for codebook construction and for codeword selection for active sensing. The proposed learning approach aims to locate a target in the service area by recursively selecting a sequence of BS beamforming codewords and RIS codewords from the respective codebooks as more measurements become available without exhaustive beam training. The codebook design and the codeword selection fuse key ideas from the vector quantized-variational autoencoder (VQ-VAE) and the long short-term memory (LSTM) network to learn respectively the discrete function space of the codebook and the temporal dependencies between measurements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.19046v1-abstract-full').style.display = 'none'; document.getElementById('2503.19046v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in IEEE Transactions on Wireless Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.14573">arXiv:2503.14573</a> <span> [<a href="https://arxiv.org/pdf/2503.14573">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Three-dimensional Reconstruction of the Lumbar Spine with Submillimeter Accuracy Using Biplanar X-ray Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wanxin Yu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Z">Zhemin Zhu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Cong Wang</a>, <a href="/search/eess?searchtype=author&query=Bao%2C+Y">Yihang Bao</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+C">Chunjie Xia</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+R">Rongshan Cheng</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+Y">Yan Yu</a>, <a href="/search/eess?searchtype=author&query=Tsai%2C+T">Tsung-Yuan Tsai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.14573v1-abstract-short" style="display: inline;"> Three-dimensional reconstruction of the spine under weight-bearing conditions from biplanar X-ray images is of great importance for the clinical assessment of spinal diseases. However, the current fully automated reconstruction methods have low accuracy and fail to meet the clinical application standards. This study developed and validated a fully automated method for high-accuracy 3D reconstructi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.14573v1-abstract-full').style.display = 'inline'; document.getElementById('2503.14573v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.14573v1-abstract-full" style="display: none;"> Three-dimensional reconstruction of the spine under weight-bearing conditions from biplanar X-ray images is of great importance for the clinical assessment of spinal diseases. However, the current fully automated reconstruction methods have low accuracy and fail to meet the clinical application standards. This study developed and validated a fully automated method for high-accuracy 3D reconstruction of the lumbar spine from biplanar X-ray images. The method involves lumbar decomposition and landmark detection from the raw X-ray images, followed by a deformable model and landmark-weighted 2D-3D registration approach. The reconstruction accuracy was validated by the gold standard obtained through the registration of CT-segmented vertebral models with the biplanar X-ray images. The proposed method achieved a 3D reconstruction accuracy of 0.80 mm, representing a significant improvement over the mainstream approaches. This study will contribute to the clinical diagnosis of lumbar in weight-bearing positions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.14573v1-abstract-full').style.display = 'none'; document.getElementById('2503.14573v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 10 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.10147">arXiv:2503.10147</a> <span> [<a href="https://arxiv.org/pdf/2503.10147">pdf</a>, <a href="https://arxiv.org/format/2503.10147">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Optimal Privacy-Preserving Distributed Median Consensus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenrui Yu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qiongxiu Li</a>, <a href="/search/eess?searchtype=author&query=Heusdens%2C+R">Richard Heusdens</a>, <a href="/search/eess?searchtype=author&query=Kosta%2C+S">Sokol Kosta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.10147v1-abstract-short" style="display: inline;"> Distributed median consensus has emerged as a critical paradigm in multi-agent systems due to the inherent robustness of the median against outliers and anomalies in measurement. Despite the sensitivity of the data involved, the development of privacy-preserving mechanisms for median consensus remains underexplored. In this work, we present the first rigorous analysis of privacy in distributed med… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.10147v1-abstract-full').style.display = 'inline'; document.getElementById('2503.10147v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.10147v1-abstract-full" style="display: none;"> Distributed median consensus has emerged as a critical paradigm in multi-agent systems due to the inherent robustness of the median against outliers and anomalies in measurement. Despite the sensitivity of the data involved, the development of privacy-preserving mechanisms for median consensus remains underexplored. In this work, we present the first rigorous analysis of privacy in distributed median consensus, focusing on an $L_1$-norm minimization framework. We establish necessary and sufficient conditions under which exact consensus and perfect privacy-defined as zero information leakage-can be achieved simultaneously. Our information-theoretic analysis provides provable guarantees against passive and eavesdropping adversaries, ensuring that private data remain concealed. Extensive numerical experiments validate our theoretical results, demonstrating the practical feasibility of achieving both accuracy and privacy in distributed median consensus. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.10147v1-abstract-full').style.display = 'none'; document.getElementById('2503.10147v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.09922">arXiv:2503.09922</a> <span> [<a href="https://arxiv.org/pdf/2503.09922">pdf</a>, <a href="https://arxiv.org/ps/2503.09922">ps</a>, <a href="https://arxiv.org/format/2503.09922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> RIS-Assisted Joint Sensing and Communications via Fractionally Constrained Fractional Programming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yiming Liu</a>, <a href="/search/eess?searchtype=author&query=Attiah%2C+K+M">Kareem M. Attiah</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.09922v1-abstract-short" style="display: inline;"> This paper studies an uplink dual-functional sensing and communication system aided by a reconfigurable intelligent surface (RIS), whose reflection pattern is optimally configured to trade-off sensing and communication functionalities. Specifically, the Bayesian Cram茅r-Rao lower bound (BCRLB) for estimating the azimuth angle of a sensing user is minimized while ensuring the signal-to-interference-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.09922v1-abstract-full').style.display = 'inline'; document.getElementById('2503.09922v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.09922v1-abstract-full" style="display: none;"> This paper studies an uplink dual-functional sensing and communication system aided by a reconfigurable intelligent surface (RIS), whose reflection pattern is optimally configured to trade-off sensing and communication functionalities. Specifically, the Bayesian Cram茅r-Rao lower bound (BCRLB) for estimating the azimuth angle of a sensing user is minimized while ensuring the signal-to-interference-plus-noise ratio constraints for communication users. We show that this problem can be formulated as a novel fractionally constrained fractional programming (FCFP) problem. To deal with this highly nontrivial problem, we extend a quadratic transform technique, originally proposed to handle optimization problems containing ratio structures only in objectives, to the scenario where the constraints also contain ratio structures. First, we consider the case where the fading coefficient is known. Using the quadratic transform, the FCFP problem is turned into a sequence of subproblems that are convex except for the constant-modulus constraints which can be tackled using a penalty-based method. To further reduce the computational complexity, we leverage the constant-modulus conditions and propose a novel linear transform. This new transform enables the FCFP problem to be turned into a sequence of linear programming (LP) subproblems, which can be solved with linear complexity in the dimension of reflecting elements. Then, we consider the case where the fading coefficient is unknown. A modified BCRLB is used to make the problem more tractable, and the proposed quadratic transform-based algorithm is used to solve the problem. Finally, numerical results unveil nontrivial and effective reflection patterns that the RIS can be configured to generate to facilitate both functionalities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.09922v1-abstract-full').style.display = 'none'; document.getElementById('2503.09922v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper has been submitted to IEEE Transactions on Wireless Communications for review and possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.00941">arXiv:2503.00941</a> <span> [<a href="https://arxiv.org/pdf/2503.00941">pdf</a>, <a href="https://arxiv.org/format/2503.00941">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> C2S-AE: CSI to Sensing enabled by an Auto-Encoder-based Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+J">Jun Jiang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+S">Shugong Xu</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenjun Yu</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yuan Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.00941v1-abstract-short" style="display: inline;"> Next-generation mobile networks are set to utilize integrated sensing and communication (ISAC) as a critical technology, providing significant support for sectors like the industrial Internet of Things (IIoT), extended reality (XR), and smart home applications. A key challenge in ISAC implementation is the extraction of sensing parameters from radio signals, a task that conventional methods strugg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00941v1-abstract-full').style.display = 'inline'; document.getElementById('2503.00941v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.00941v1-abstract-full" style="display: none;"> Next-generation mobile networks are set to utilize integrated sensing and communication (ISAC) as a critical technology, providing significant support for sectors like the industrial Internet of Things (IIoT), extended reality (XR), and smart home applications. A key challenge in ISAC implementation is the extraction of sensing parameters from radio signals, a task that conventional methods struggle to achieve due to the complexity of acquiring sensing channel data. In this paper, we introduce a novel auto-encoder (AE)-based framework to acquire sensing information using channel state information (CSI). Specifically, our framework, termed C2S (CSI to sensing)-AE, learns the relationship between CSI and the delay power spectrum (DPS), from which the range information can be readily accessed. To validate our framework's performance, we conducted measurements of DPS and CSI in real-world scenarios and introduced the dataset 'SHU7'. Our extensive experiments demonstrate that the framework excels in C2S extrapolation, surpassing existing methods in terms of accuracy for both delay and signal strength of individual paths. This innovative approach holds the potential to greatly enhance sensing capabilities in future mobile networks, paving the way for more robust and versatile ISAC applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00941v1-abstract-full').style.display = 'none'; document.getElementById('2503.00941v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.00920">arXiv:2503.00920</a> <span> [<a href="https://arxiv.org/pdf/2503.00920">pdf</a>, <a href="https://arxiv.org/format/2503.00920">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> High-Q non-invasive Glucose Sensor using MicrostripLine Main Field and Split Ring Resonator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tay%2C+B+K">Brandon Kaiheng Tay</a>, <a href="/search/eess?searchtype=author&query=Kapoor%2C+S">Saumitra Kapoor</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenwei Yu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+S+Y">Shao Ying Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.00920v1-abstract-short" style="display: inline;"> A high-Q sensor integrating microstrip line (MLIN) main field and split ring resonators is presented for non-invasive glucose sensing. The proposed sensor combines the field-focusing effects of split ring resonators with the enhanced field substrate interaction properties of the MLIN main field, using the reflection coefficient (S11) of an open-ended MLIN with the finger as the substrate and opera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00920v1-abstract-full').style.display = 'inline'; document.getElementById('2503.00920v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.00920v1-abstract-full" style="display: none;"> A high-Q sensor integrating microstrip line (MLIN) main field and split ring resonators is presented for non-invasive glucose sensing. The proposed sensor combines the field-focusing effects of split ring resonators with the enhanced field substrate interaction properties of the MLIN main field, using the reflection coefficient (S11) of an open-ended MLIN with the finger as the substrate and operating at 750 MHz and 1.5 GHz. The permittivity of blood inside the finger depends on the glucose concentration, which in turn affects the S11 of the system. Sensor geometry was optimized using Method-of-Moments simulation before the sensor was fabricated and validated on standard solutions of glucose concentrations between 0 to 126 mg/dL within the physiological range, and a human test subject. In both experiments, a near inverse-linear relationship between the S11 peak magnitude and the glucose concentration was observed, demonstrating the sensitivity of the proposed sensor for detecting changes in blood glucose concentration at physiological conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00920v1-abstract-full').style.display = 'none'; document.getElementById('2503.00920v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18766">arXiv:2502.18766</a> <span> [<a href="https://arxiv.org/pdf/2502.18766">pdf</a>, <a href="https://arxiv.org/format/2502.18766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> MTCA: Multi-Task Channel Analysis for Wireless Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+J">Jun Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenjun Yu</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yuan Gao</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+S">Shugong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18766v1-abstract-short" style="display: inline;"> In modern wireless communication systems, the effective processing of Channel State Information (CSI) is crucial for enhancing communication quality and reliability. However, current methods often handle different tasks in isolation, thereby neglecting the synergies among various tasks and leading to extract CSI features inadequately for subsequent analysis. To address these limitations, this pape… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18766v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18766v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18766v1-abstract-full" style="display: none;"> In modern wireless communication systems, the effective processing of Channel State Information (CSI) is crucial for enhancing communication quality and reliability. However, current methods often handle different tasks in isolation, thereby neglecting the synergies among various tasks and leading to extract CSI features inadequately for subsequent analysis. To address these limitations, this paper introduces a novel Multi-Task Channel Analysis framework named MTCA, aimed at improving the performance of wireless communication even sensing. MTCA is designed to handle four critical tasks, including channel prediction, antenna-domain channel extrapolation, channel identification, and scenario classification. Experiments conducted on a multi-scenario, multi-antenna dataset tailored for UAV-based communications demonstrate that the proposed MTCA exhibits superior comprehension of CSI, achieving enhanced performance across all evaluated tasks. Notably, MTCA reached 100% prediction accuracy in channel identification and scenario classification. Compared to the previous state-of-the-art methods, MTCA improved channel prediction performance by 20.1% and antenna-domain extrapolation performance by 54.5%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18766v1-abstract-full').style.display = 'none'; document.getElementById('2502.18766v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11965">arXiv:2502.11965</a> <span> [<a href="https://arxiv.org/pdf/2502.11965">pdf</a>, <a href="https://arxiv.org/format/2502.11965">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A MIMO Wireless Channel Foundation Model via CIR-CSI Consistency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+J">Jun Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenjun Yu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yunfan Li</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yuan Gao</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+S">Shugong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11965v2-abstract-short" style="display: inline;"> In the field of artificial intelligence, self-supervised learning has demonstrated superior generalization capabilities by leveraging large-scale unlabeled datasets for pretraining, which is especially critical for wireless communication models to adapt to a variety of scenarios. This paper innovatively treats Channel State Information (CSI) and Channel Impulse Response (CIR) as naturally aligned… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11965v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11965v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11965v2-abstract-full" style="display: none;"> In the field of artificial intelligence, self-supervised learning has demonstrated superior generalization capabilities by leveraging large-scale unlabeled datasets for pretraining, which is especially critical for wireless communication models to adapt to a variety of scenarios. This paper innovatively treats Channel State Information (CSI) and Channel Impulse Response (CIR) as naturally aligned multi-modal data and proposes the first MIMO wireless channel foundation model, named CSI-CLIP. By effectively capturing the joint representations of both CIR and CSI, CSI-CLIP exhibits remarkable adaptability across scenarios and robust feature extraction capabilities. Experimental results show that in positioning task, CSI-CLIP reduces the mean error distance by 22%; in beam management task, it increases accuracy by 1% compared to traditional supervised methods, as well as in the channel identification task. These improvements not only highlight the potential and value of CSI-CLIP in integrating sensing and communication but also demonstrate its significant advantages over existing techniques. Moreover, viewing CSI and CIR as multi-modal pairs and contrastive learning for wireless channel foundation model open up new research directions in the domain of MIMO wireless communications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11965v2-abstract-full').style.display = 'none'; document.getElementById('2502.11965v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2025 ICMLCN accepted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09839">arXiv:2412.09839</a> <span> [<a href="https://arxiv.org/pdf/2412.09839">pdf</a>, <a href="https://arxiv.org/format/2412.09839">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> AI and Deep Learning for THz Ultra-Massive MIMO: From Model-Driven Approaches to Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wentao Yu</a>, <a href="/search/eess?searchtype=author&query=He%2C+H">Hengtao He</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenghui Song</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+L">Linglong Dai</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+L">Lizhong Zheng</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09839v1-abstract-short" style="display: inline;"> In this paper, we explore the potential of artificial intelligence (AI) to address the challenges posed by terahertz ultra-massive multiple-input multiple-output (THz UM-MIMO) systems. We begin by outlining the characteristics of THz UM-MIMO systems, and identify three primary challenges for the transceiver design: 'hard to compute', 'hard to model', and 'hard to measure'. We argue that AI can pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09839v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09839v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09839v1-abstract-full" style="display: none;"> In this paper, we explore the potential of artificial intelligence (AI) to address the challenges posed by terahertz ultra-massive multiple-input multiple-output (THz UM-MIMO) systems. We begin by outlining the characteristics of THz UM-MIMO systems, and identify three primary challenges for the transceiver design: 'hard to compute', 'hard to model', and 'hard to measure'. We argue that AI can provide a promising solution to these challenges. We then propose two systematic research roadmaps for developing AI algorithms tailored for THz UM-MIMO systems. The first roadmap, called model-driven deep learning (DL), emphasizes the importance to leverage available domain knowledge and advocates for adopting AI only to enhance the bottleneck modules within an established signal processing or optimization framework. We discuss four essential steps to make it work, including algorithmic frameworks, basis algorithms, loss function design, and neural architecture design. Afterwards, we present a forward-looking vision through the second roadmap, i.e., physical layer foundation models. This approach seeks to unify the design of different transceiver modules by focusing on their common foundation, i.e., the wireless channel. We propose to train a single, compact foundation model to estimate the score function of wireless channels, which can serve as a versatile prior for designing a wide variety of transceiver modules. We will also guide the readers through four essential steps, including general frameworks, conditioning, site-specific adaptation, and the joint design of foundation models and model-driven DL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09839v1-abstract-full').style.display = 'none'; document.getElementById('2412.09839v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 8 figures, tutorial paper. Physical layer foundation models and model-driven deep learning are presented as two systematic research roadmaps for AI-enabled THz ultra-massive MIMO systems</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08504">arXiv:2412.08504</a> <span> [<a href="https://arxiv.org/pdf/2412.08504">pdf</a>, <a href="https://arxiv.org/format/2412.08504">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> PointTalk: Audio-Driven Dynamic Lip Point Cloud for 3D Gaussian-based Talking Head Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xie%2C+Y">Yifan Xie</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+T">Tao Feng</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xin Zhang</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+X">Xiangyang Luo</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Z">Zixuan Guo</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Weijiang Yu</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+H">Heng Chang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+F">Fei Ma</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+F+R">Fei Richard Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08504v1-abstract-short" style="display: inline;"> Talking head synthesis with arbitrary speech audio is a crucial challenge in the field of digital humans. Recently, methods based on radiance fields have received increasing attention due to their ability to synthesize high-fidelity and identity-consistent talking heads from just a few minutes of training video. However, due to the limited scale of the training data, these methods often exhibit po… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08504v1-abstract-full').style.display = 'inline'; document.getElementById('2412.08504v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08504v1-abstract-full" style="display: none;"> Talking head synthesis with arbitrary speech audio is a crucial challenge in the field of digital humans. Recently, methods based on radiance fields have received increasing attention due to their ability to synthesize high-fidelity and identity-consistent talking heads from just a few minutes of training video. However, due to the limited scale of the training data, these methods often exhibit poor performance in audio-lip synchronization and visual quality. In this paper, we propose a novel 3D Gaussian-based method called PointTalk, which constructs a static 3D Gaussian field of the head and deforms it in sync with the audio. It also incorporates an audio-driven dynamic lip point cloud as a critical component of the conditional information, thereby facilitating the effective synthesis of talking heads. Specifically, the initial step involves generating the corresponding lip point cloud from the audio signal and capturing its topological structure. The design of the dynamic difference encoder aims to capture the subtle nuances inherent in dynamic lip movements more effectively. Furthermore, we integrate the audio-point enhancement module, which not only ensures the synchronization of the audio signal with the corresponding lip point cloud within the feature space, but also facilitates a deeper understanding of the interrelations among cross-modal conditional features. Extensive experiments demonstrate that our method achieves superior high-fidelity and audio-lip synchronization in talking head synthesis compared to previous methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08504v1-abstract-full').style.display = 'none'; document.getElementById('2412.08504v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, accepted by AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18138">arXiv:2411.18138</a> <span> [<a href="https://arxiv.org/pdf/2411.18138">pdf</a>, <a href="https://arxiv.org/format/2411.18138">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> SALMONN-omni: A Codec-free LLM for Full-duplex Speech Understanding and Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenyi Yu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Siyin Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+X">Xiaoyu Yang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xianzhao Chen</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+X">Xiaohai Tian</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Lu Lu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18138v1-abstract-short" style="display: inline;"> Full-duplex multimodal large language models (LLMs) provide a unified framework for addressing diverse speech understanding and generation tasks, enabling more natural and seamless human-machine conversations. Unlike traditional modularised conversational AI systems, which separate speech recognition, understanding, and text-to-speech generation into distinct components, multimodal LLMs operate as… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18138v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18138v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18138v1-abstract-full" style="display: none;"> Full-duplex multimodal large language models (LLMs) provide a unified framework for addressing diverse speech understanding and generation tasks, enabling more natural and seamless human-machine conversations. Unlike traditional modularised conversational AI systems, which separate speech recognition, understanding, and text-to-speech generation into distinct components, multimodal LLMs operate as single end-to-end models. This streamlined design eliminates error propagation across components and fully leverages the rich non-verbal information embedded in input speech signals. We introduce SALMONN-omni, a codec-free, full-duplex speech understanding and generation model capable of simultaneously listening to its own generated speech and background sounds while speaking. To support this capability, we propose a novel duplex spoken dialogue framework incorporating a ``thinking'' mechanism that facilitates asynchronous text and speech generation relying on embeddings instead of codecs (quantized speech and audio tokens). Experimental results demonstrate SALMONN-omni's versatility across a broad range of streaming speech tasks, including speech recognition, speech enhancement, and spoken question answering. Additionally, SALMONN-omni excels at managing turn-taking, barge-in, and echo cancellation scenarios, establishing its potential as a robust prototype for full-duplex conversational AI systems. To the best of our knowledge, SALMONN-omni is the first codec-free model of its kind. A full technical report along with model checkpoints will be released soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18138v1-abstract-full').style.display = 'none'; document.getElementById('2411.18138v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Technical report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10775">arXiv:2411.10775</a> <span> [<a href="https://arxiv.org/pdf/2411.10775">pdf</a>, <a href="https://arxiv.org/format/2411.10775">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Beyond Feature Mapping GAP: Integrating Real HDRTV Priors for Superior SDRTV-to-HDRTV Conversion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+K">Kepeng Xu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Li Xu</a>, <a href="/search/eess?searchtype=author&query=He%2C+G">Gang He</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhiqiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenxin Yu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shihao Wang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+D">Dajiang Zhou</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yunsong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10775v1-abstract-short" style="display: inline;"> The rise of HDR-WCG display devices has highlighted the need to convert SDRTV to HDRTV, as most video sources are still in SDR. Existing methods primarily focus on designing neural networks to learn a single-style mapping from SDRTV to HDRTV. However, the limited information in SDRTV and the diversity of styles in real-world conversions render this process an ill-posed problem, thereby constrainin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10775v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10775v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10775v1-abstract-full" style="display: none;"> The rise of HDR-WCG display devices has highlighted the need to convert SDRTV to HDRTV, as most video sources are still in SDR. Existing methods primarily focus on designing neural networks to learn a single-style mapping from SDRTV to HDRTV. However, the limited information in SDRTV and the diversity of styles in real-world conversions render this process an ill-posed problem, thereby constraining the performance and generalization of these methods. Inspired by generative approaches, we propose a novel method for SDRTV to HDRTV conversion guided by real HDRTV priors. Despite the limited information in SDRTV, introducing real HDRTV as reference priors significantly constrains the solution space of the originally high-dimensional ill-posed problem. This shift transforms the task from solving an unreferenced prediction problem to making a referenced selection, thereby markedly enhancing the accuracy and reliability of the conversion process. Specifically, our approach comprises two stages: the first stage employs a Vector Quantized Generative Adversarial Network to capture HDRTV priors, while the second stage matches these priors to the input SDRTV content to recover realistic HDRTV outputs. We evaluate our method on public datasets, demonstrating its effectiveness with significant improvements in both objective and subjective metrics across real and synthetic datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10775v1-abstract-full').style.display = 'none'; document.getElementById('2411.10775v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages,4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10773">arXiv:2411.10773</a> <span> [<a href="https://arxiv.org/pdf/2411.10773">pdf</a>, <a href="https://arxiv.org/format/2411.10773">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> An End-to-End Real-World Camera Imaging Pipeline </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+K">Kepeng Xu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Zijia Ma</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Li Xu</a>, <a href="/search/eess?searchtype=author&query=He%2C+G">Gang He</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yunsong Li</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenxin Yu</a>, <a href="/search/eess?searchtype=author&query=Han%2C+T">Taichu Han</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C">Cheng Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10773v1-abstract-short" style="display: inline;"> Recent advances in neural camera imaging pipelines have demonstrated notable progress. Nevertheless, the real-world imaging pipeline still faces challenges including the lack of joint optimization in system components, computational redundancies, and optical distortions such as lens shading.In light of this, we propose an end-to-end camera imaging pipeline (RealCamNet) to enhance real-world camera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10773v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10773v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10773v1-abstract-full" style="display: none;"> Recent advances in neural camera imaging pipelines have demonstrated notable progress. Nevertheless, the real-world imaging pipeline still faces challenges including the lack of joint optimization in system components, computational redundancies, and optical distortions such as lens shading.In light of this, we propose an end-to-end camera imaging pipeline (RealCamNet) to enhance real-world camera imaging performance. Our methodology diverges from conventional, fragmented multi-stage image signal processing towards end-to-end architecture. This architecture facilitates joint optimization across the full pipeline and the restoration of coordinate-biased distortions. RealCamNet is designed for high-quality conversion from RAW to RGB and compact image compression. Specifically, we deeply analyze coordinate-dependent optical distortions, e.g., vignetting and dark shading, and design a novel Coordinate-Aware Distortion Restoration (CADR) module to restore coordinate-biased distortions. Furthermore, we propose a Coordinate-Independent Mapping Compression (CIMC) module to implement tone mapping and redundant information compression. Existing datasets suffer from misalignment and overly idealized conditions, making them inadequate for training real-world imaging pipelines. Therefore, we collected a real-world imaging dataset. Experiment results show that RealCamNet achieves the best rate-distortion performance with lower inference latency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10773v1-abstract-full').style.display = 'none'; document.getElementById('2411.10773v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accept by ACMMM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16644">arXiv:2409.16644</a> <span> [<a href="https://arxiv.org/pdf/2409.16644">pdf</a>, <a href="https://arxiv.org/format/2409.16644">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Enabling Auditory Large Language Models for Automatic Speech Quality Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Siyin Wang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenyi Yu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yudong Yang</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+C">Changli Tang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yixuan Li</a>, <a href="/search/eess?searchtype=author&query=Zhuang%2C+J">Jimin Zhuang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xianzhao Chen</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+X">Xiaohai Tian</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Lu Lu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16644v2-abstract-short" style="display: inline;"> Speech quality assessment typically requires evaluating audio from multiple aspects, such as mean opinion score (MOS) and speaker similarity (SIM) \etc., which can be challenging to cover using one small model designed for a single task. In this paper, we propose leveraging recently introduced auditory large language models (LLMs) for automatic speech quality assessment. By employing task-specific… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16644v2-abstract-full').style.display = 'inline'; document.getElementById('2409.16644v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16644v2-abstract-full" style="display: none;"> Speech quality assessment typically requires evaluating audio from multiple aspects, such as mean opinion score (MOS) and speaker similarity (SIM) \etc., which can be challenging to cover using one small model designed for a single task. In this paper, we propose leveraging recently introduced auditory large language models (LLMs) for automatic speech quality assessment. By employing task-specific prompts, auditory LLMs are finetuned to predict MOS, SIM and A/B testing results, which are commonly used for evaluating text-to-speech systems. Additionally, the finetuned auditory LLM is able to generate natural language descriptions assessing aspects like noisiness, distortion, discontinuity, and overall quality, providing more interpretable outputs. Extensive experiments have been performed on the NISQA, BVCC, SOMOS and VoxSim speech quality datasets, using open-source auditory LLMs such as SALMONN, Qwen-Audio, and Qwen2-Audio. For the natural language descriptions task, a commercial model Google Gemini 1.5 Pro is also evaluated. The results demonstrate that auditory LLMs achieve competitive performance compared to state-of-the-art task-specific small models in predicting MOS and SIM, while also delivering promising results in A/B testing and natural language descriptions. Our data processing scripts and finetuned model checkpoints can be found at https://github.com/bytedance/SALMONN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16644v2-abstract-full').style.display = 'none'; document.getElementById('2409.16644v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10226">arXiv:2409.10226</a> <span> [<a href="https://arxiv.org/pdf/2409.10226">pdf</a>, <a href="https://arxiv.org/format/2409.10226">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Privacy-Preserving Distributed Maximum Consensus Without Accuracy Loss </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenrui Yu</a>, <a href="/search/eess?searchtype=author&query=Heusdens%2C+R">Richard Heusdens</a>, <a href="/search/eess?searchtype=author&query=Pang%2C+J">Jun Pang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qiongxiu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10226v1-abstract-short" style="display: inline;"> In distributed networks, calculating the maximum element is a fundamental task in data analysis, known as the distributed maximum consensus problem. However, the sensitive nature of the data involved makes privacy protection essential. Despite its importance, privacy in distributed maximum consensus has received limited attention in the literature. Traditional privacy-preserving methods typically… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10226v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10226v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10226v1-abstract-full" style="display: none;"> In distributed networks, calculating the maximum element is a fundamental task in data analysis, known as the distributed maximum consensus problem. However, the sensitive nature of the data involved makes privacy protection essential. Despite its importance, privacy in distributed maximum consensus has received limited attention in the literature. Traditional privacy-preserving methods typically add noise to updates, degrading the accuracy of the final result. To overcome these limitations, we propose a novel distributed optimization-based approach that preserves privacy without sacrificing accuracy. Our method introduces virtual nodes to form an augmented graph and leverages a carefully designed initialization process to ensure the privacy of honest participants, even when all their neighboring nodes are dishonest. Through a comprehensive information-theoretical analysis, we derive a sufficient condition to protect private data against both passive and eavesdropping adversaries. Extensive experiments validate the effectiveness of our approach, demonstrating that it not only preserves perfect privacy but also maintains accuracy, outperforming existing noise-based methods that typically suffer from accuracy loss. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10226v1-abstract-full').style.display = 'none'; document.getElementById('2409.10226v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09642">arXiv:2409.09642</a> <span> [<a href="https://arxiv.org/pdf/2409.09642">pdf</a>, <a href="https://arxiv.org/format/2409.09642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Extract and Diffuse: Latent Integration for Improved Diffusion-based Speech and Vocal Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yudong Yang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zhan Liu</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenyi Yu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+Q">Qiuqiang Kong</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09642v1-abstract-short" style="display: inline;"> Diffusion-based generative models have recently achieved remarkable results in speech and vocal enhancement due to their ability to model complex speech data distributions. While these models generalize well to unseen acoustic environments, they may not achieve the same level of fidelity as the discriminative models specifically trained to enhance particular acoustic conditions. In this paper, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09642v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09642v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09642v1-abstract-full" style="display: none;"> Diffusion-based generative models have recently achieved remarkable results in speech and vocal enhancement due to their ability to model complex speech data distributions. While these models generalize well to unseen acoustic environments, they may not achieve the same level of fidelity as the discriminative models specifically trained to enhance particular acoustic conditions. In this paper, we propose Ex-Diff, a novel score-based diffusion model that integrates the latent representations produced by a discriminative model to improve speech and vocal enhancement, which combines the strengths of both generative and discriminative models. Experimental results on the widely used MUSDB dataset show relative improvements of 3.7% in SI-SDR and 10.0% in SI-SIR compared to the baseline diffusion model for speech and vocal enhancement tasks, respectively. Additionally, case studies are provided to further illustrate and analyze the complementary nature of generative and discriminative models in this context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09642v1-abstract-full').style.display = 'none'; document.getElementById('2409.09642v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.04380">arXiv:2409.04380</a> <span> [<a href="https://arxiv.org/pdf/2409.04380">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A MEMS-based terahertz broadband beam steering technique </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Weihua Yu</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+H">Hong Peng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+M">Mingze Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Haolin Li</a>, <a href="/search/eess?searchtype=author&query=Xue%2C+Y">Yuan Xue</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+H">Huikai Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.04380v1-abstract-short" style="display: inline;"> A multi-level tunable reflection array wide-angle beam scanning method is proposed to address the limited bandwidth and small scanning angle issues of current terahertz beam scanning technology. In this method, a focusing lens and its array are used to achieve terahertz wave spatial beam control, and MEMS mirrors and their arrays are used to achieve wide-angle beam scanning. The 1~3 order terahert… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04380v1-abstract-full').style.display = 'inline'; document.getElementById('2409.04380v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.04380v1-abstract-full" style="display: none;"> A multi-level tunable reflection array wide-angle beam scanning method is proposed to address the limited bandwidth and small scanning angle issues of current terahertz beam scanning technology. In this method, a focusing lens and its array are used to achieve terahertz wave spatial beam control, and MEMS mirrors and their arrays are used to achieve wide-angle beam scanning. The 1~3 order terahertz MEMS beam scanning system designed based on this method can extend the mechanical scanning angle of MEMS mirrors by 2~6 times, when tested and verified using an electromagnetic MEMS mirror with a 7mm optical aperture and a scanning angle of 15掳 and a D-band terahertz signal source. The experiment shows that the operating bandwidth of the first-order terahertz MEMS beam scanning system is better than 40GHz, the continuous beam scanning angle is about 30掳, the continuous beam scanning cycle response time is about 1.1ms, and the antenna gain is better than 15dBi at 160GHz. This method has been validated for its large bandwidth and scalable scanning angle, and has potential application prospects in terahertz dynamic communication, detection radar, scanning imaging, and other fields. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04380v1-abstract-full').style.display = 'none'; document.getElementById('2409.04380v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19627">arXiv:2406.19627</a> <span> [<a href="https://arxiv.org/pdf/2406.19627">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Practical Power System Inertia Monitoring Based on Pumped Storage Hydropower Operation Signature </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+H">Hongyu Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Chang Chen</a>, <a href="/search/eess?searchtype=author&query=Baldwin%2C+M">Mark Baldwin</a>, <a href="/search/eess?searchtype=author&query=You%2C+S">Shutang You</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenpeng Yu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lin Zhu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yilu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19627v2-abstract-short" style="display: inline;"> This paper proposes a practical method to monitor power system inertia using Pumped Storage Hydropower (PSH) switching-off events. This approach offers real-time system-level inertia estimation with minimal expenses, no disruption, and the inclusion of behind-the-meter inertia. First, accurate inertia estimation is achieved through improved RoCoF calculation that accounts for pre-event RoCoF, redu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19627v2-abstract-full').style.display = 'inline'; document.getElementById('2406.19627v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19627v2-abstract-full" style="display: none;"> This paper proposes a practical method to monitor power system inertia using Pumped Storage Hydropower (PSH) switching-off events. This approach offers real-time system-level inertia estimation with minimal expenses, no disruption, and the inclusion of behind-the-meter inertia. First, accurate inertia estimation is achieved through improved RoCoF calculation that accounts for pre-event RoCoF, reducing common random frequency fluctuations in practice. Second, PSH field data is analyzed, highlighting the benefits of using switching-off events for grid inertia estimation. Third, an event detection trigger is designed to capture pump switching-off events based on local and system features. Fourth, the method is validated on the U.S. Eastern Interconnection model with over 60,000 buses, demonstrating very high accuracy (3%-5% error rate). Finally, it is applied to the U.S. Western Interconnection, with field validation showing a 9.9% average absolute error rate. Despite challenges in practical power system inertia estimation, this method enhances decision-making for power grid reliability and efficiency, addressing challenges posed by renewable energy integration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19627v2-abstract-full').style.display = 'none'; document.getElementById('2406.19627v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19483">arXiv:2406.19483</a> <span> [<a href="https://arxiv.org/pdf/2406.19483">pdf</a>, <a href="https://arxiv.org/ps/2406.19483">ps</a>, <a href="https://arxiv.org/format/2406.19483">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Localization in Multipath Environments via Active Sensing with Reconfigurable Intelligent Surfaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yinghan Li</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19483v2-abstract-short" style="display: inline;"> This letter investigates an uplink pilot-based wireless indoor localization problem in a multipath environment for a single-input single-output (SISO) narrowband communication system aided by reconfigurable intelligent surface (RIS). The indoor localization problem is challenging because the uplink channel consists of multiple overlapping propagation paths with varying amplitudes and phases, which… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19483v2-abstract-full').style.display = 'inline'; document.getElementById('2406.19483v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19483v2-abstract-full" style="display: none;"> This letter investigates an uplink pilot-based wireless indoor localization problem in a multipath environment for a single-input single-output (SISO) narrowband communication system aided by reconfigurable intelligent surface (RIS). The indoor localization problem is challenging because the uplink channel consists of multiple overlapping propagation paths with varying amplitudes and phases, which are not easy to differentiate. This letter proposes the use of RIS capable of adaptively changing its reflection pattern to sense such a multiple-path environment. Toward this end, we train a long-short-term-memory (LSTM) based controller to perform adaptive sequential reconfigurations of the RIS over multiple stages and propose to group multiple pilots as input in each stage. Information from the multiple paths is captured by training the LSTM to generate multiple RIS configurations to align to the different paths within each stage. Experimental results show that the proposed approach is effective in significantly reducing training complexity while maintaining localization performance at fixed number of pilots. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19483v2-abstract-full').style.display = 'none'; document.getElementById('2406.19483v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18361">arXiv:2406.18361</a> <span> [<a href="https://arxiv.org/pdf/2406.18361">pdf</a>, <a href="https://arxiv.org/format/2406.18361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Stable Diffusion Segmentation for Biomedical Images with Single-step Reverse Process </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+T">Tianyu Lin</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhiguang Chen</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Z">Zhonghao Yan</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Weijiang Yu</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+F">Fudan Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18361v3-abstract-short" style="display: inline;"> Diffusion models have demonstrated their effectiveness across various generative tasks. However, when applied to medical image segmentation, these models encounter several challenges, including significant resource and time requirements. They also necessitate a multi-step reverse process and multiple samples to produce reliable predictions. To address these challenges, we introduce the first laten… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18361v3-abstract-full').style.display = 'inline'; document.getElementById('2406.18361v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18361v3-abstract-full" style="display: none;"> Diffusion models have demonstrated their effectiveness across various generative tasks. However, when applied to medical image segmentation, these models encounter several challenges, including significant resource and time requirements. They also necessitate a multi-step reverse process and multiple samples to produce reliable predictions. To address these challenges, we introduce the first latent diffusion segmentation model, named SDSeg, built upon stable diffusion (SD). SDSeg incorporates a straightforward latent estimation strategy to facilitate a single-step reverse process and utilizes latent fusion concatenation to remove the necessity for multiple samples. Extensive experiments indicate that SDSeg surpasses existing state-of-the-art methods on five benchmark datasets featuring diverse imaging modalities. Remarkably, SDSeg is capable of generating stable predictions with a solitary reverse step and sample, epitomizing the model's stability as implied by its name. The code is available at https://github.com/lin-tianyu/Stable-Diffusion-Seg <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18361v3-abstract-full').style.display = 'none'; document.getElementById('2406.18361v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at MICCAI 2024. Code and citation info see https://github.com/lin-tianyu/Stable-Diffusion-Seg</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07914">arXiv:2406.07914</a> <span> [<a href="https://arxiv.org/pdf/2406.07914">pdf</a>, <a href="https://arxiv.org/format/2406.07914">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Can Large Language Models Understand Spatial Audio? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tang%2C+C">Changli Tang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenyi Yu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xianzhao Chen</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+T">Tian Tan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Lu Lu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Zejun Ma</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07914v2-abstract-short" style="display: inline;"> This paper explores enabling large language models (LLMs) to understand spatial information from multichannel audio, a skill currently lacking in auditory LLMs. By leveraging LLMs' advanced cognitive and inferential abilities, the aim is to enhance understanding of 3D environments via audio. We study 3 spatial audio tasks: sound source localization (SSL), far-field speech recognition (FSR), and lo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07914v2-abstract-full').style.display = 'inline'; document.getElementById('2406.07914v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07914v2-abstract-full" style="display: none;"> This paper explores enabling large language models (LLMs) to understand spatial information from multichannel audio, a skill currently lacking in auditory LLMs. By leveraging LLMs' advanced cognitive and inferential abilities, the aim is to enhance understanding of 3D environments via audio. We study 3 spatial audio tasks: sound source localization (SSL), far-field speech recognition (FSR), and localisation-informed speech extraction (LSE), achieving notable progress in each task. For SSL, our approach achieves an MAE of $2.70^{\circ}$ on the Spatial LibriSpeech dataset, substantially surpassing the prior benchmark of about $6.60^{\circ}$. Moreover, our model can employ spatial cues to improve FSR accuracy and execute LSE by selectively attending to sounds originating from a specified direction via text prompts, even amidst overlapping speech. These findings highlight the potential of adapting LLMs to grasp physical audio concepts, paving the way for LLM-based agents in 3D environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07914v2-abstract-full').style.display = 'none'; document.getElementById('2406.07914v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at Interspeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04737">arXiv:2406.04737</a> <span> [<a href="https://arxiv.org/pdf/2406.04737">pdf</a>, <a href="https://arxiv.org/format/2406.04737">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TWC.2024.3425473">10.1109/TWC.2024.3425473 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TWC.2024.3425473">10.1109/TWC.2024.3425473 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TWC.2024.3425473">10.1109/TWC.2024.3425473 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Fast-Fading Channel and Power Optimization of the Magnetic Inductive Cellular Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ma%2C+H">Honglei Ma</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+E">Erwu Liu</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Z">Zhijun Fang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yongbin Gao</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenjun Yu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+D">Dongming Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04737v2-abstract-short" style="display: inline;"> The cellular network of magnetic Induction (MI) communication holds promise in long-distance underground environments. In the traditional MI communication, there is no fast-fading channel since the MI channel is treated as a quasi-static channel. However, for the vehicle (mobile) MI (VMI) communication, the unpredictable antenna vibration brings the remarkable fast-fading. As such fast-fading cann… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04737v2-abstract-full').style.display = 'inline'; document.getElementById('2406.04737v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04737v2-abstract-full" style="display: none;"> The cellular network of magnetic Induction (MI) communication holds promise in long-distance underground environments. In the traditional MI communication, there is no fast-fading channel since the MI channel is treated as a quasi-static channel. However, for the vehicle (mobile) MI (VMI) communication, the unpredictable antenna vibration brings the remarkable fast-fading. As such fast-fading cannot be modeled by the central limit theorem, it differs radically from other wireless fast-fading channels. Unfortunately, few studies focus on this phenomenon. In this paper, using a novel space modeling based on the electromagnetic field theorem, we propose a 3-dimension model of the VMI antenna vibration. By proposing ``conjugate pseudo-piecewise functions'' and boundary $p(x)$ distribution, we derive the cumulative distribution function (CDF), probability density function (PDF) and the expectation of the VMI fast-fading channel. We also theoretically analyze the effects of the VMI fast-fading on the network throughput, including the VMI outage probability which can be ignored in the traditional MI channel study. We draw several intriguing conclusions different from those in wireless fast-fading studies. For instance, the fast-fading brings more uniformly distributed channel coefficients. Finally, we propose the power control algorithm using the non-cooperative game and multiagent Q-learning methods to optimize the throughput of the cellular VMI network. Simulations validate the derivation and the proposed algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04737v2-abstract-full').style.display = 'none'; document.getElementById('2406.04737v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been accepted by the IEEE TWC for publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04867">arXiv:2405.04867</a> <span> [<a href="https://arxiv.org/pdf/2405.04867">pdf</a>, <a href="https://arxiv.org/format/2405.04867">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MIPI 2024 Challenge on Demosaic for HybridEVS Camera: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yaqi Wu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+Z">Zhihao Fan</a>, <a href="/search/eess?searchtype=author&query=Chu%2C+X">Xiaofeng Chu</a>, <a href="/search/eess?searchtype=author&query=Ren%2C+J+S">Jimmy S. Ren</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiaoming Li</a>, <a href="/search/eess?searchtype=author&query=Yue%2C+Z">Zongsheng Yue</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chongyi Li</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+S">Shangcheng Zhou</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+R">Ruicheng Feng</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+Y">Yuekun Dai</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+P">Peiqing Yang</a>, <a href="/search/eess?searchtype=author&query=Loy%2C+C+C">Chen Change Loy</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+S">Senyan Xu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Z">Zhijing Sun</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+J">Jiaying Zhu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Y">Yurui Zhu</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+X">Xueyang Fu</a>, <a href="/search/eess?searchtype=author&query=Zha%2C+Z">Zheng-Jun Zha</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+J">Jun Cao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Cheng Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+S">Shu Chen</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+L">Liang Ma</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+S">Shiyang Zhou</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+H">Haijin Zeng</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+K">Kai Feng</a> , et al. (24 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04867v1-abstract-short" style="display: inline;"> The increasing demand for computational photography and imaging on mobile platforms has led to the widespread development and integration of advanced image sensors with novel algorithms in camera systems. However, the scarcity of high-quality data for research and the rare opportunity for in-depth exchange of views from industry and academia constrain the development of mobile intelligent photogra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04867v1-abstract-full').style.display = 'inline'; document.getElementById('2405.04867v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04867v1-abstract-full" style="display: none;"> The increasing demand for computational photography and imaging on mobile platforms has led to the widespread development and integration of advanced image sensors with novel algorithms in camera systems. However, the scarcity of high-quality data for research and the rare opportunity for in-depth exchange of views from industry and academia constrain the development of mobile intelligent photography and imaging (MIPI). Building on the achievements of the previous MIPI Workshops held at ECCV 2022 and CVPR 2023, we introduce our third MIPI challenge including three tracks focusing on novel image sensors and imaging algorithms. In this paper, we summarize and review the Nighttime Flare Removal track on MIPI 2024. In total, 170 participants were successfully registered, and 14 teams submitted results in the final testing phase. The developed solutions in this challenge achieved state-of-the-art performance on Nighttime Flare Removal. More details of this challenge and the link to the dataset can be found at https://mipi-challenge.org/MIPI2024/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04867v1-abstract-full').style.display = 'none'; document.getElementById('2405.04867v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MIPI@CVPR2024. Website: https://mipi-challenge.org/MIPI2024/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.03129">arXiv:2405.03129</a> <span> [<a href="https://arxiv.org/pdf/2405.03129">pdf</a>, <a href="https://arxiv.org/format/2405.03129">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Active Sensing for Multiuser Beam Tracking with Reconfigurable Intelligent Surface </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Han%2C+H">Han Han</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.03129v3-abstract-short" style="display: inline;"> This paper studies a beam tracking problem in which an access point (AP), in collaboration with a reconfigurable intelligent surface (RIS), dynamically adjusts its downlink beamformers and the reflection pattern at the RIS in order to maintain reliable communications with multiple mobile user equipments (UEs). Specifically, the mobile UEs send uplink pilots to the AP periodically during the channe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03129v3-abstract-full').style.display = 'inline'; document.getElementById('2405.03129v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.03129v3-abstract-full" style="display: none;"> This paper studies a beam tracking problem in which an access point (AP), in collaboration with a reconfigurable intelligent surface (RIS), dynamically adjusts its downlink beamformers and the reflection pattern at the RIS in order to maintain reliable communications with multiple mobile user equipments (UEs). Specifically, the mobile UEs send uplink pilots to the AP periodically during the channel sensing intervals, the AP then adaptively configures the beamformers and the RIS reflection coefficients for subsequent data transmission based on the received pilots. This is an active sensing problem, because channel sensing involves configuring the RIS coefficients during the pilot stage and the optimal sensing strategy should exploit the trajectory of channel state information (CSI) from previously received pilots. Analytical solution to such an active sensing problem is very challenging. In this paper, we propose a deep learning framework utilizing a recurrent neural network (RNN) to automatically summarize the time-varying CSI obtained from the periodically received pilots into state vectors. These state vectors are then mapped to the AP beamformers and RIS reflection coefficients for subsequent downlink data transmissions, as well as the RIS reflection coefficients for the next round of uplink channel sensing. The mappings from the state vectors to the downlink beamformers and the RIS reflection coefficients for both channel sensing and downlink data transmission are performed using graph neural networks (GNNs) to account for the interference among the UEs. Simulations demonstrate significant and interpretable performance improvement of the proposed approach over the existing data-driven methods with nonadaptive channel sensing schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03129v3-abstract-full').style.display = 'none'; document.getElementById('2405.03129v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13640">arXiv:2404.13640</a> <span> [<a href="https://arxiv.org/pdf/2404.13640">pdf</a>, <a href="https://arxiv.org/format/2404.13640">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Beyond Alignment: Blind Video Face Restoration via Parsing-Guided Temporal-Coherent Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+K">Kepeng Xu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Li Xu</a>, <a href="/search/eess?searchtype=author&query=He%2C+G">Gang He</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenxin Yu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yunsong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13640v1-abstract-short" style="display: inline;"> Multiple complex degradations are coupled in low-quality video faces in the real world. Therefore, blind video face restoration is a highly challenging ill-posed problem, requiring not only hallucinating high-fidelity details but also enhancing temporal coherence across diverse pose variations. Restoring each frame independently in a naive manner inevitably introduces temporal incoherence and arti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13640v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13640v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13640v1-abstract-full" style="display: none;"> Multiple complex degradations are coupled in low-quality video faces in the real world. Therefore, blind video face restoration is a highly challenging ill-posed problem, requiring not only hallucinating high-fidelity details but also enhancing temporal coherence across diverse pose variations. Restoring each frame independently in a naive manner inevitably introduces temporal incoherence and artifacts from pose changes and keypoint localization errors. To address this, we propose the first blind video face restoration approach with a novel parsing-guided temporal-coherent transformer (PGTFormer) without pre-alignment. PGTFormer leverages semantic parsing guidance to select optimal face priors for generating temporally coherent artifact-free results. Specifically, we pre-train a temporal-spatial vector quantized auto-encoder on high-quality video face datasets to extract expressive context-rich priors. Then, the temporal parse-guided codebook predictor (TPCP) restores faces in different poses based on face parsing context cues without performing face pre-alignment. This strategy reduces artifacts and mitigates jitter caused by cumulative errors from face pre-alignment. Finally, the temporal fidelity regulator (TFR) enhances fidelity through temporal feature interaction and improves video temporal consistency. Extensive experiments on face videos show that our method outperforms previous face restoration baselines. The code will be released on \href{https://github.com/kepengxu/PGTFormer}{https://github.com/kepengxu/PGTFormer}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13640v1-abstract-full').style.display = 'none'; document.getElementById('2404.13640v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13392">arXiv:2404.13392</a> <span> [<a href="https://arxiv.org/pdf/2404.13392">pdf</a>, <a href="https://arxiv.org/format/2404.13392">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Beamforming Design for Integrated Sensing and Communications Using Uplink-Downlink Duality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Attiah%2C+K+M">Kareem M. Attiah</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13392v1-abstract-short" style="display: inline;"> This paper presents a novel optimization framework for beamforming design in integrated sensing and communication systems where a base station seeks to minimize the Bayesian Cram茅r-Rao bound of a sensing problem while satisfying quality of service constraints for the communication users. Prior approaches formulate the design problem as a semidefinite program for which acquiring a beamforming solut… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13392v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13392v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13392v1-abstract-full" style="display: none;"> This paper presents a novel optimization framework for beamforming design in integrated sensing and communication systems where a base station seeks to minimize the Bayesian Cram茅r-Rao bound of a sensing problem while satisfying quality of service constraints for the communication users. Prior approaches formulate the design problem as a semidefinite program for which acquiring a beamforming solution is computationally expensive. In this work, we show that the computational burden can be considerably alleviated. To achieve this, we transform the design problem to a tractable form that not only provides a new understanding of Cram茅r-Rao bound optimization, but also allows for an uplink-downlink duality relation to be developed. Such a duality result gives rise to an efficient algorithm that enables the beamforming design problem to be solved at a much lower complexity as compared to the-state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13392v1-abstract-full').style.display = 'none'; document.getElementById('2404.13392v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2 figures, accepted at ISIT2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.09004">arXiv:2403.09004</a> <span> [<a href="https://arxiv.org/pdf/2403.09004">pdf</a>, <a href="https://arxiv.org/ps/2403.09004">ps</a>, <a href="https://arxiv.org/format/2403.09004">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Meta-Learning-Based Fronthaul Compression for Cloud Radio Access Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qiao%2C+R">Ruihua Qiao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.09004v1-abstract-short" style="display: inline;"> This paper investigates the fronthaul compression problem in a user-centric cloud radio access network, in which single-antenna users are served by a central processor (CP) cooperatively via a cluster of remote radio heads (RRHs). To satisfy the fronthaul capacity constraint, this paper proposes a transform-compress-forward scheme, which consists of well-designed transformation matrices and unifor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09004v1-abstract-full').style.display = 'inline'; document.getElementById('2403.09004v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.09004v1-abstract-full" style="display: none;"> This paper investigates the fronthaul compression problem in a user-centric cloud radio access network, in which single-antenna users are served by a central processor (CP) cooperatively via a cluster of remote radio heads (RRHs). To satisfy the fronthaul capacity constraint, this paper proposes a transform-compress-forward scheme, which consists of well-designed transformation matrices and uniform quantizers. The transformation matrices perform dimension reduction in the uplink and dimension expansion in the downlink. To reduce the communication overhead for designing the transformation matrices, this paper further proposes a deep learning framework to first learn a suboptimal transformation matrix at each RRH based on the local channel state information (CSI), and then to refine it iteratively. To facilitate the refinement process, we propose an efficient signaling scheme that only requires the transmission of low-dimensional effective CSI and its gradient between the CP and RRH, and further, a meta-learning based gated recurrent unit network to reduce the number of signaling transmission rounds. For the sum-rate maximization problem, simulation results show that the proposed two-stage neural network can perform close to the fully cooperative global CSI based benchmark with significantly reduced communication overhead for both the uplink and the downlink. Moreover, using the first stage alone can already outperform the existing local CSI based benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09004v1-abstract-full').style.display = 'none'; document.getElementById('2403.09004v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 Pages, 13 Figures; accepted in IEEE Transactions on Wireless Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00134">arXiv:2403.00134</a> <span> [<a href="https://arxiv.org/pdf/2403.00134">pdf</a>, <a href="https://arxiv.org/format/2403.00134">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Active Sensing for Reciprocal MIMO Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00134v2-abstract-short" style="display: inline;"> This paper addresses the design of transmit precoder and receive combiner matrices to support $N_{\rm s}$ independent data streams over a time-division duplex (TDD) point-to-point massive multiple-input multiple-output (MIMO) channel with either a fully digital or a hybrid structure. The optimal precoder and combiner design amounts to finding the top-$N_{\rm s}$ singular vectors of the channel mat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00134v2-abstract-full').style.display = 'inline'; document.getElementById('2403.00134v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00134v2-abstract-full" style="display: none;"> This paper addresses the design of transmit precoder and receive combiner matrices to support $N_{\rm s}$ independent data streams over a time-division duplex (TDD) point-to-point massive multiple-input multiple-output (MIMO) channel with either a fully digital or a hybrid structure. The optimal precoder and combiner design amounts to finding the top-$N_{\rm s}$ singular vectors of the channel matrix, but the explicit estimation of the entire high-dimensional channel would require significant pilot overhead. Alternatively, prior works suggest to find the precoding and combining matrices directly by exploiting channel reciprocity and by using the power iteration method, but its performance degrades in the low SNR regime. To tackle this challenging problem, this paper proposes a learning-based active sensing framework, where the transmitter and the receiver send pilots alternately using sensing beamformers that are actively designed as functions of previously received pilots. This is accomplished by using recurrent neural networks to summarize information from the historical observations into hidden state vectors, then using fully connected neural networks to learn the appropriate sensing beamformers in the next pilot stage and finally the transmit precoding and receive combiner matrices for data communications. Simulations demonstrate that the learning-based method outperforms existing approaches significantly and maintains superior performance even in the low SNR regime for both the fully digital and hybrid MIMO scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00134v2-abstract-full').style.display = 'none'; document.getElementById('2403.00134v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted in IEEE Transactions on Signal Processing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11743">arXiv:2402.11743</a> <span> [<a href="https://arxiv.org/pdf/2402.11743">pdf</a>, <a href="https://arxiv.org/format/2402.11743">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TWC.2023.3335362">10.1109/TWC.2023.3335362 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Hybrid Online-Offline Learning for Task Offloading in Mobile Edge Computing Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sohaib%2C+M">Muhammad Sohaib</a>, <a href="/search/eess?searchtype=author&query=Jeon%2C+S">Sang-Woon Jeon</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11743v2-abstract-short" style="display: inline;"> We consider a multi-user multi-server mobile edge computing (MEC) system, in which users arrive on a network randomly over time and generate computation tasks, which will be computed either locally on their own computing devices or be offloaded to one of the MEC servers. Under such a dynamic network environment, we propose a novel task offloading policy based on hybrid online-offline learning, whi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11743v2-abstract-full').style.display = 'inline'; document.getElementById('2402.11743v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11743v2-abstract-full" style="display: none;"> We consider a multi-user multi-server mobile edge computing (MEC) system, in which users arrive on a network randomly over time and generate computation tasks, which will be computed either locally on their own computing devices or be offloaded to one of the MEC servers. Under such a dynamic network environment, we propose a novel task offloading policy based on hybrid online-offline learning, which can efficiently reduce the overall computation delay and energy consumption only with information available at nearest MEC servers from each user. We provide a practical signaling and learning framework that can train deep neural networks for both online and offline learning and can adjust its offloading policy based on the queuing status of each MEC server and network dynamics. Numerical results demonstrate that the proposed scheme significantly reduces the average computation delay for a broad class of network environments compared to the conventional offloading methods. It is further shown that the proposed hybrid online-offline learning framework can be extended to a general cost function reflecting both delay and energy-dependent metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11743v2-abstract-full').style.display = 'none'; document.getElementById('2402.11743v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by IEEE Transactions on Wireless Communications</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Wireless Communications (2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.12025">arXiv:2401.12025</a> <span> [<a href="https://arxiv.org/pdf/2401.12025">pdf</a>, <a href="https://arxiv.org/format/2401.12025">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Recent Advances in Optimization Methods for Wireless Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Ya-Feng Liu</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+T">Tsung-Hui Chang</a>, <a href="/search/eess?searchtype=author&query=Hong%2C+M">Mingyi Hong</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Z">Zheyu Wu</a>, <a href="/search/eess?searchtype=author&query=So%2C+A+M">Anthony Man-Cho So</a>, <a href="/search/eess?searchtype=author&query=Jorswieck%2C+E+A">Eduard A. Jorswieck</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.12025v3-abstract-short" style="display: inline;"> Mathematical optimization is now widely regarded as an indispensable modeling and solution tool for the design of wireless communications systems. While optimization has played a significant role in the revolutionary progress in wireless communication and networking technologies from 1G to 5G and onto the future 6G, the innovations in wireless technologies have also substantially transformed the n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.12025v3-abstract-full').style.display = 'inline'; document.getElementById('2401.12025v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.12025v3-abstract-full" style="display: none;"> Mathematical optimization is now widely regarded as an indispensable modeling and solution tool for the design of wireless communications systems. While optimization has played a significant role in the revolutionary progress in wireless communication and networking technologies from 1G to 5G and onto the future 6G, the innovations in wireless technologies have also substantially transformed the nature of the underlying mathematical optimization problems upon which the system designs are based and have sparked significant innovations in the development of methodologies to understand, to analyze, and to solve those problems. In this paper, we provide a comprehensive survey of recent advances in mathematical optimization theory and algorithms for wireless communication system design. We begin by illustrating common features of mathematical optimization problems arising in wireless communication system design. We discuss various scenarios and use cases and their associated mathematical structures from an optimization perspective. We then provide an overview of recently developed optimization techniques in areas ranging from nonconvex optimization, global optimization, and integer programming, to distributed optimization and learning-based optimization. The key to successful solution of mathematical optimization problems is in carefully choosing or developing suitable algorithms (or neural network architectures) that can exploit the underlying problem structure. We conclude the paper by identifying several open research challenges and outlining future research directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.12025v3-abstract-full').style.display = 'none'; document.getElementById('2401.12025v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">39 pages, 5 figures, accepted for publication in IEEE Journal on Selected Areas in Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.10438">arXiv:2312.10438</a> <span> [<a href="https://arxiv.org/pdf/2312.10438">pdf</a>, <a href="https://arxiv.org/format/2312.10438">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/JSTSP.2024.3414137">10.1109/JSTSP.2024.3414137 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Bayes-Optimal Unsupervised Learning for Channel Estimation in Near-Field Holographic MIMO </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wentao Yu</a>, <a href="/search/eess?searchtype=author&query=He%2C+H">Hengtao He</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+X">Xianghao Yu</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenghui Song</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&query=Murch%2C+R">Ross Murch</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.10438v2-abstract-short" style="display: inline;"> Holographic MIMO (HMIMO) is being increasingly recognized as a key enabling technology for 6G wireless systems through the deployment of an extremely large number of antennas within a compact space to fully exploit the potentials of the electromagnetic (EM) channel. Nevertheless, the benefits of HMIMO systems cannot be fully unleashed without an efficient means to estimate the high-dimensional cha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10438v2-abstract-full').style.display = 'inline'; document.getElementById('2312.10438v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.10438v2-abstract-full" style="display: none;"> Holographic MIMO (HMIMO) is being increasingly recognized as a key enabling technology for 6G wireless systems through the deployment of an extremely large number of antennas within a compact space to fully exploit the potentials of the electromagnetic (EM) channel. Nevertheless, the benefits of HMIMO systems cannot be fully unleashed without an efficient means to estimate the high-dimensional channel, whose distribution becomes increasingly complicated due to the accessibility of the near-field region. In this paper, we address the fundamental challenge of designing a low-complexity Bayes-optimal channel estimator in near-field HMIMO systems operating in unknown EM environments. The core idea is to estimate the HMIMO channels solely based on the Stein's score function of the received pilot signals and an estimated noise level, without relying on priors or supervision that is not feasible in practical deployment. A neural network is trained with the unsupervised denoising score matching objective to learn the parameterized score function. Meanwhile, a principal component analysis (PCA)-based algorithm is proposed to estimate the noise level leveraging the low-rank near-field spatial correlation. Building upon these techniques, we develop a Bayes-optimal score-based channel estimator for fully-digital HMIMO transceivers in a closed form. The optimal score-based estimator is also extended to hybrid analog-digital HMIMO systems by incorporating it into a low-complexity message passing algorithm. The (quasi-) Bayes-optimality of the proposed estimators is validated both in theory and by extensive simulation results. In addition to optimality, it is shown that our proposal is robust to various mismatches and can quickly adapt to dynamic EM environments in an online manner thanks to its unsupervised nature, demonstrating its potential in real-world deployment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10438v2-abstract-full').style.display = 'none'; document.getElementById('2312.10438v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 7 figures, 3 tables, accepted by IEEE Journal of Selected Topics in Signal Processing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.09002">arXiv:2312.09002</a> <span> [<a href="https://arxiv.org/pdf/2312.09002">pdf</a>, <a href="https://arxiv.org/format/2312.09002">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Localization with Reconfigurable Intelligent Surface: An Active Sensing Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhongze Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.09002v2-abstract-short" style="display: inline;"> This paper addresses an uplink localization problem in which a base station (BS) aims to locate a remote user with the help of reconfigurable intelligent surfaces (RISs). We propose a strategy in which the user transmits pilots sequentially and the BS adaptively adjusts the sensing vectors, including the BS beamforming vector and multiple RIS reflection coefficients based on the observations alrea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.09002v2-abstract-full').style.display = 'inline'; document.getElementById('2312.09002v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.09002v2-abstract-full" style="display: none;"> This paper addresses an uplink localization problem in which a base station (BS) aims to locate a remote user with the help of reconfigurable intelligent surfaces (RISs). We propose a strategy in which the user transmits pilots sequentially and the BS adaptively adjusts the sensing vectors, including the BS beamforming vector and multiple RIS reflection coefficients based on the observations already made, to eventually produce an estimated user position. This is a challenging active sensing problem for which finding an optimal solution involves searching through a complicated functional space whose dimension increases with the number of measurements. We show that the long short-term memory (LSTM) network can be used to exploit the latent temporal correlation between measurements to automatically construct scalable state vectors. Subsequently, the state vector is mapped to the sensing vectors for the next time frame via a deep neural network (DNN). A final DNN is used to map the state vector to the estimated user position. Numerical result illustrates the advantage of the active sensing design as compared to non-active sensing methods. The proposed solution produces interpretable results and is generalizable in the number of sensing stages. Remarkably, we show that a network with one BS and multiple RISs can outperform a comparable setting with multiple BSs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.09002v2-abstract-full').style.display = 'none'; document.getElementById('2312.09002v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in IEEE Transactions on Wireless Communications. This is an extended version of the previous arXiv paper arXiv:2310.13160</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.15299">arXiv:2311.15299</a> <span> [<a href="https://arxiv.org/pdf/2311.15299">pdf</a>, <a href="https://arxiv.org/ps/2311.15299">ps</a>, <a href="https://arxiv.org/format/2311.15299">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TIT.2024.3470952">10.1109/TIT.2024.3470952 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Covariance-Based Activity Detection in Cooperative Multi-Cell Massive MIMO: Scaling Law and Efficient Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Ziyue Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Ya-Feng Liu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhaorui Wang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.15299v3-abstract-short" style="display: inline;"> This paper focuses on the covariance-based activity detection problem in a multi-cell massive multiple-input multiple-output (MIMO) system. In this system, active devices transmit their signature sequences to multiple base stations (BSs), and the BSs cooperatively detect the active devices based on the received signals. While the scaling law for the covariance-based activity detection in the singl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.15299v3-abstract-full').style.display = 'inline'; document.getElementById('2311.15299v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.15299v3-abstract-full" style="display: none;"> This paper focuses on the covariance-based activity detection problem in a multi-cell massive multiple-input multiple-output (MIMO) system. In this system, active devices transmit their signature sequences to multiple base stations (BSs), and the BSs cooperatively detect the active devices based on the received signals. While the scaling law for the covariance-based activity detection in the single-cell scenario has been extensively analyzed in the literature, this paper aims to analyze the scaling law for the covariance-based activity detection in the multi-cell massive MIMO system. Specifically, this paper demonstrates a quadratic scaling law in the multi-cell system, under the assumption that the path-loss exponent of the fading channel $纬> 2.$ This finding shows that, in the multi-cell massive MIMO system, the maximum number of active devices that can be correctly detected in each cell increases quadratically with the length of the signature sequence and decreases logarithmically with the number of cells (as the number of antennas tends to infinity). Moreover, in addition to analyzing the scaling law for the signature sequences randomly and uniformly distributed on a sphere, the paper also establishes the scaling law for signature sequences based on a finite alphabet, which are easier to generate and store. Finally, this paper proposes two efficient accelerated coordinate descent (CD) algorithms with a convergence guarantee for solving the device activity detection problem. The first algorithm reduces the complexity of CD by using an inexact coordinate update strategy. The second algorithm avoids unnecessary computations of CD by using an active set selection strategy. Simulation results show that the proposed algorithms exhibit excellent performance in terms of computational efficiency and detection error probability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.15299v3-abstract-full').style.display = 'none'; document.getElementById('2311.15299v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 12 figures, accepted for publication in IEEE Transactions on Information Theory</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.13626">arXiv:2311.13626</a> <span> [<a href="https://arxiv.org/pdf/2311.13626">pdf</a>, <a href="https://arxiv.org/format/2311.13626">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Physics-driven generative adversarial networks empower single-pixel infrared hyperspectral imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+D">Dong-Yin Wang</a>, <a href="/search/eess?searchtype=author&query=Bie%2C+S">Shu-Hang Bie</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xi-Hao Chen</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wen-Kai Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.13626v1-abstract-short" style="display: inline;"> A physics-driven generative adversarial network (GAN) was established here for single-pixel hyperspectral imaging (HSI) in the infrared spectrum, to eliminate the extensive data training work required by traditional data-driven model. Within the GAN framework, the physical process of single-pixel imaging (SPI) was integrated into the generator, and the actual and estimated one-dimensional (1D) buc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.13626v1-abstract-full').style.display = 'inline'; document.getElementById('2311.13626v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.13626v1-abstract-full" style="display: none;"> A physics-driven generative adversarial network (GAN) was established here for single-pixel hyperspectral imaging (HSI) in the infrared spectrum, to eliminate the extensive data training work required by traditional data-driven model. Within the GAN framework, the physical process of single-pixel imaging (SPI) was integrated into the generator, and the actual and estimated one-dimensional (1D) bucket signals were employed as constraints in the objective function to update the network's parameters and optimize the generator with the assistance of the discriminator. In comparison to single-pixel infrared HSI methods based on compressed sensing and physics-driven convolution neural networks, our physics-driven GAN-based single-pixel infrared HSI can achieve higher imaging performance but with fewer measurements. We believe that this physics-driven GAN will promote practical applications of computational imaging, especially various SPI-based techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.13626v1-abstract-full').style.display = 'none'; document.getElementById('2311.13626v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.07908">arXiv:2311.07908</a> <span> [<a href="https://arxiv.org/pdf/2311.07908">pdf</a>, <a href="https://arxiv.org/format/2311.07908">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Learning Bayes-Optimal Channel Estimation for Holographic MIMO in Unknown EM Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wentao Yu</a>, <a href="/search/eess?searchtype=author&query=He%2C+H">Hengtao He</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+X">Xianghao Yu</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenghui Song</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&query=Murch%2C+R+D">Ross D. Murch</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.07908v2-abstract-short" style="display: inline;"> Holographic MIMO (HMIMO) has recently been recognized as a promising enabler for future 6G systems through the use of an ultra-massive number of antennas in a compact space to exploit the propagation characteristics of the electromagnetic (EM) channel. Nevertheless, the promised gain of HMIMO could not be fully unleashed without an efficient means to estimate the high-dimensional channel. Bayes-op… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07908v2-abstract-full').style.display = 'inline'; document.getElementById('2311.07908v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.07908v2-abstract-full" style="display: none;"> Holographic MIMO (HMIMO) has recently been recognized as a promising enabler for future 6G systems through the use of an ultra-massive number of antennas in a compact space to exploit the propagation characteristics of the electromagnetic (EM) channel. Nevertheless, the promised gain of HMIMO could not be fully unleashed without an efficient means to estimate the high-dimensional channel. Bayes-optimal estimators typically necessitate either a large volume of supervised training samples or a priori knowledge of the true channel distribution, which could hardly be available in practice due to the enormous system scale and the complicated EM environments. It is thus important to design a Bayes-optimal estimator for the HMIMO channels in arbitrary and unknown EM environments, free of any supervision or priors. This work proposes a self-supervised minimum mean-square-error (MMSE) channel estimation algorithm based on powerful machine learning tools, i.e., score matching and principal component analysis. The training stage requires only the pilot signals, without knowing the spatial correlation, the ground-truth channels, or the received signal-to-noise-ratio. Simulation results will show that, even being totally self-supervised, the proposed algorithm can still approach the performance of the oracle MMSE method with an extremely low complexity, making it a competitive candidate in practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07908v2-abstract-full').style.display = 'none'; document.getElementById('2311.07908v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures, 1 table, accepted for presentation at IEEE ICC 2024, Denver, CO, USA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.04546">arXiv:2311.04546</a> <span> [<a href="https://arxiv.org/pdf/2311.04546">pdf</a>, <a href="https://arxiv.org/ps/2311.04546">ps</a>, <a href="https://arxiv.org/format/2311.04546">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Discerning and Enhancing the Weighted Sum-Rate Maximization Algorithms in Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zepeng Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Z">Ziping Zhao</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+K">Kaiming Shen</a>, <a href="/search/eess?searchtype=author&query=Palomar%2C+D+P">Daniel P. Palomar</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.04546v1-abstract-short" style="display: inline;"> Weighted sum-rate (WSR) maximization plays a critical role in communication system design. This paper examines three optimization methods for WSR maximization, which ensure convergence to stationary points: two block coordinate ascent (BCA) algorithms, namely, weighted sum-minimum mean-square error (WMMSE) and WSR maximization via fractional programming (WSR-FP), along with a minorization-maximiza… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.04546v1-abstract-full').style.display = 'inline'; document.getElementById('2311.04546v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.04546v1-abstract-full" style="display: none;"> Weighted sum-rate (WSR) maximization plays a critical role in communication system design. This paper examines three optimization methods for WSR maximization, which ensure convergence to stationary points: two block coordinate ascent (BCA) algorithms, namely, weighted sum-minimum mean-square error (WMMSE) and WSR maximization via fractional programming (WSR-FP), along with a minorization-maximization (MM) algorithm, WSR maximization via MM (WSR-MM). Our contributions are threefold. Firstly, we delineate the exact relationships among WMMSE, WSR-FP, and WSR-MM, which, despite their extensive use in the literature, lack a comprehensive comparative study. By probing the theoretical underpinnings linking the BCA and MM algorithmic frameworks, we reveal the direct correlations between the equivalent transformation techniques, essential to the development of WMMSE and WSR-FP, and the surrogate functions pivotal to WSR-MM. Secondly, we propose a novel algorithm, WSR-MM+, harnessing the flexibility of selecting surrogate functions in MM framework. By circumventing the repeated matrix inversions in the search for optimal Lagrange multipliers in existing algorithms, WSR-MM+ significantly reduces the computational load per iteration and accelerates convergence. Thirdly, we reconceptualize WSR-MM+ within the BCA framework, introducing a new equivalent transform, which gives rise to an enhanced version of WSR-FP, named as WSR-FP+. We further demonstrate that WSR-MM+ can be construed as the basic gradient projection method. This perspective yields a deeper understanding into its computational intricacies. Numerical simulations corroborate the connections between WMMSE, WSR-FP, and WSR-MM and confirm the efficacy of the proposed WSR-MM+ and WSR-FP+ algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.04546v1-abstract-full').style.display = 'none'; document.getElementById('2311.04546v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.13289">arXiv:2310.13289</a> <span> [<a href="https://arxiv.org/pdf/2310.13289">pdf</a>, <a href="https://arxiv.org/format/2310.13289">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> SALMONN: Towards Generic Hearing Abilities for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tang%2C+C">Changli Tang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenyi Yu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xianzhao Chen</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+T">Tian Tan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Lu Lu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Zejun Ma</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.13289v2-abstract-short" style="display: inline;"> Hearing is arguably an essential ability of artificial intelligence (AI) agents in the physical world, which refers to the perception and understanding of general auditory information consisting of at least three types of sounds: speech, audio events, and music. In this paper, we propose SALMONN, a speech audio language music open neural network, built by integrating a pre-trained text-based large… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13289v2-abstract-full').style.display = 'inline'; document.getElementById('2310.13289v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.13289v2-abstract-full" style="display: none;"> Hearing is arguably an essential ability of artificial intelligence (AI) agents in the physical world, which refers to the perception and understanding of general auditory information consisting of at least three types of sounds: speech, audio events, and music. In this paper, we propose SALMONN, a speech audio language music open neural network, built by integrating a pre-trained text-based large language model (LLM) with speech and audio encoders into a single multimodal model. SALMONN enables the LLM to directly process and understand general audio inputs and achieve competitive performances on a number of speech and audio tasks used in training, such as automatic speech recognition and translation, auditory-information-based question answering, emotion recognition, speaker verification, and music and audio captioning etc. SALMONN also has a diverse set of emergent abilities unseen in the training, which includes but is not limited to speech translation to untrained languages, speech-based slot filling, spoken-query-based question answering, audio-based storytelling, and speech audio co-reasoning etc. The presence of cross-modal emergent abilities is studied, and a novel few-shot activation tuning approach is proposed to activate such abilities. To our knowledge, SALMONN is the first model of its type and can be regarded as a step towards AI with generic hearing abilities. The source code, model checkpoints and data are available at https://github.com/bytedance/SALMONN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13289v2-abstract-full').style.display = 'none'; document.getElementById('2310.13289v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.13160">arXiv:2310.13160</a> <span> [<a href="https://arxiv.org/pdf/2310.13160">pdf</a>, <a href="https://arxiv.org/format/2310.13160">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Active Sensing for Localization with Reconfigurable Intelligent Surface </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhongze Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.13160v1-abstract-short" style="display: inline;"> This paper addresses an uplink localization problem in which the base station (BS) aims to locate a remote user with the aid of reconfigurable intelligent surface (RIS). This paper proposes a strategy in which the user transmits pilots over multiple time frames, and the BS adaptively adjusts the RIS reflection coefficients based on the observations already received so far in order to produce an ac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13160v1-abstract-full').style.display = 'inline'; document.getElementById('2310.13160v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.13160v1-abstract-full" style="display: none;"> This paper addresses an uplink localization problem in which the base station (BS) aims to locate a remote user with the aid of reconfigurable intelligent surface (RIS). This paper proposes a strategy in which the user transmits pilots over multiple time frames, and the BS adaptively adjusts the RIS reflection coefficients based on the observations already received so far in order to produce an accurate estimate of the user location at the end. This is a challenging active sensing problem for which finding an optimal solution involves a search through a complicated functional space whose dimension increases with the number of measurements. In this paper, we show that the long short-term memory (LSTM) network can be used to exploit the latent temporal correlation between measurements to automatically construct scalable information vectors (called hidden state) based on the measurements. Subsequently, the state vector can be mapped to the RIS configuration for the next time frame in a codebook-free fashion via a deep neural network (DNN). After all the measurements have been received, a final DNN can be used to map the LSTM cell state to the estimated user equipment (UE) position. Numerical result shows that the proposed active RIS design results in lower localization error as compared to existing active and nonactive methods. The proposed solution produces interpretable results and is generalizable to early stopping in the sequence of sensing stages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13160v1-abstract-full').style.display = 'none'; document.getElementById('2310.13160v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in IEEE International Conference on Communications (ICC) 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.05863">arXiv:2310.05863</a> <span> [<a href="https://arxiv.org/pdf/2310.05863">pdf</a>, <a href="https://arxiv.org/format/2310.05863">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Fine-grained Audio-Visual Joint Representations for Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenyi Yu</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+C">Changli Tang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xianzhao Chen</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+T">Tian Tan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Lu Lu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Zejun Ma</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.05863v2-abstract-short" style="display: inline;"> Audio-visual large language models (LLM) have drawn significant attention, yet the fine-grained combination of both input streams is rather under-explored, which is challenging but necessary for LLMs to understand general video inputs. To this end, a fine-grained audio-visual joint representation (FAVOR) learning framework for multimodal LLMs is proposed in this paper, which extends a text-based L… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05863v2-abstract-full').style.display = 'inline'; document.getElementById('2310.05863v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.05863v2-abstract-full" style="display: none;"> Audio-visual large language models (LLM) have drawn significant attention, yet the fine-grained combination of both input streams is rather under-explored, which is challenging but necessary for LLMs to understand general video inputs. To this end, a fine-grained audio-visual joint representation (FAVOR) learning framework for multimodal LLMs is proposed in this paper, which extends a text-based LLM to simultaneously perceive speech and audio events in the audio input stream and images or videos in the visual input stream, at the frame level. To fuse the audio and visual feature streams into joint representations and to align the joint space with the LLM input embedding space, we propose a causal Q-Former structure with a causal attention module to enhance the capture of causal relations of the audio-visual frames across time. An audio-visual evaluation benchmark (AVEB) is also proposed which comprises six representative single-modal tasks with five cross-modal tasks reflecting audio-visual co-reasoning abilities. While achieving competitive single-modal performance on audio, speech and image tasks in AVEB, FAVOR achieved over 20% accuracy improvements on the video question-answering task when fine-grained information or temporal causal reasoning is required. FAVOR, in addition, demonstrated remarkable video comprehension and reasoning abilities on tasks that are unprecedented by other multimodal LLMs. An interactive demo of FAVOR is available at https://github.com/BriansIDP/AudioVisualLLM.git, and the training code and model checkpoints will be released soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05863v2-abstract-full').style.display = 'none'; document.getElementById('2310.05863v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.05021">arXiv:2310.05021</a> <span> [<a href="https://arxiv.org/pdf/2310.05021">pdf</a>, <a href="https://arxiv.org/format/2310.05021">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Toward Intelligent Emergency Control for Large-scale Power Systems: Convergence of Learning, Physics, Computing and Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+Q">Qiuhua Huang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+R">Renke Huang</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+T">Tianzhixi Yin</a>, <a href="/search/eess?searchtype=author&query=Datta%2C+S">Sohom Datta</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+X">Xueqing Sun</a>, <a href="/search/eess?searchtype=author&query=Hou%2C+J">Jason Hou</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+J">Jie Tan</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenhao Yu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yuan Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xinya Li</a>, <a href="/search/eess?searchtype=author&query=Palmer%2C+B">Bruce Palmer</a>, <a href="/search/eess?searchtype=author&query=Li%2C+A">Ang Li</a>, <a href="/search/eess?searchtype=author&query=Ke%2C+X">Xinda Ke</a>, <a href="/search/eess?searchtype=author&query=Vaiman%2C+M">Marianna Vaiman</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Song Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yousu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.05021v1-abstract-short" style="display: inline;"> This paper has delved into the pressing need for intelligent emergency control in large-scale power systems, which are experiencing significant transformations and are operating closer to their limits with more uncertainties. Learning-based control methods are promising and have shown effectiveness for intelligent power system control. However, when they are applied to large-scale power systems, t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05021v1-abstract-full').style.display = 'inline'; document.getElementById('2310.05021v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.05021v1-abstract-full" style="display: none;"> This paper has delved into the pressing need for intelligent emergency control in large-scale power systems, which are experiencing significant transformations and are operating closer to their limits with more uncertainties. Learning-based control methods are promising and have shown effectiveness for intelligent power system control. However, when they are applied to large-scale power systems, there are multifaceted challenges such as scalability, adaptiveness, and security posed by the complex power system landscape, which demand comprehensive solutions. The paper first proposes and instantiates a convergence framework for integrating power systems physics, machine learning, advanced computing, and grid control to realize intelligent grid control at a large scale. Our developed methods and platform based on the convergence framework have been applied to a large (more than 3000 buses) Texas power system, and tested with 56000 scenarios. Our work achieved a 26% reduction in load shedding on average and outperformed existing rule-based control in 99.7% of the test scenarios. The results demonstrated the potential of the proposed convergence framework and DRL-based intelligent control for the future grid. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05021v1-abstract-full').style.display = 'none'; document.getElementById('2310.05021v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to PSCC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.03347">arXiv:2310.03347</a> <span> [<a href="https://arxiv.org/pdf/2310.03347">pdf</a>, <a href="https://arxiv.org/format/2310.03347">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Razumikhin-type ISS Lyapunov function and small gain theorem for discrete time time-delay systems with application to a biased min-consensus protocol </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Mo%2C+Y">Yuanqiu Mo</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenwu Yu</a>, <a href="/search/eess?searchtype=author&query=Hou%2C+H">Huazhou Hou</a>, <a href="/search/eess?searchtype=author&query=Dasgupta%2C+S">Soura Dasgupta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.03347v1-abstract-short" style="display: inline;"> This paper considers small gain theorems for the global asymptotic and exponential input-to-state stability for discrete time time-delay systems using Razumikhin-type Lyapunov function. Among other things, unlike the existing literature, it provides both necessary and sufficient conditions for exponential input-to-state stability in terms of the Razumikhin-type Lyapunov function and the small gain… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.03347v1-abstract-full').style.display = 'inline'; document.getElementById('2310.03347v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.03347v1-abstract-full" style="display: none;"> This paper considers small gain theorems for the global asymptotic and exponential input-to-state stability for discrete time time-delay systems using Razumikhin-type Lyapunov function. Among other things, unlike the existing literature, it provides both necessary and sufficient conditions for exponential input-to-state stability in terms of the Razumikhin-type Lyapunov function and the small gain theorem. Previous necessary ad sufficient conditions were with the more computationally onerous, Krasovskii-type Lyapunov functions. The result finds application in the robust stability analysis of a graph-based distributed algorithm, namely, the biased min-consensus protocol, which can be used to compute the length of the shortest path from each node to its nearest source in a graph. We consider the biased min-consensus protocol under perturbations that are common in communication networks, including noise, delay and asynchronous communication. By converting such a perturbed protocol into a discrete time time-delay nonlinear system, we prove its exponential input-to-state stability under perturbations using our Razumikhin-type Lyapunov-based small gain theorem. Simulations are provided to verify the theoretical results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.03347v1-abstract-full').style.display = 'none'; document.getElementById('2310.03347v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.02086">arXiv:2310.02086</a> <span> [<a href="https://arxiv.org/pdf/2310.02086">pdf</a>, <a href="https://arxiv.org/format/2310.02086">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Bearing-Based Target Entrapping Control of Multiple Uncertain Agents With Arbitrary Maneuvers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Su%2C+H">Haifan Su</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Z">Ziwen Yang</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+S">Shanying Zhu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Cailian Chen</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenbin Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.02086v2-abstract-short" style="display: inline;"> This paper is concerned with bearing-based cooperative target entrapping control of multiple uncertain agents with arbitrary maneuvers including shape deformation, rotations, scalings, etc. A leader-follower structure is used, where the leaders move with the predesigned trajectories, and the followers are steered by an estimation-based control method, integrating a distance estimator using bearing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.02086v2-abstract-full').style.display = 'inline'; document.getElementById('2310.02086v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.02086v2-abstract-full" style="display: none;"> This paper is concerned with bearing-based cooperative target entrapping control of multiple uncertain agents with arbitrary maneuvers including shape deformation, rotations, scalings, etc. A leader-follower structure is used, where the leaders move with the predesigned trajectories, and the followers are steered by an estimation-based control method, integrating a distance estimator using bearing measurements and a stress matrix-based formation controller. The signum functions are used to compensate for the uncertainties so that the agents' accelerations can be piecewise continuous and bounded to track the desired dynamics. With proper design of the leaders' trajectories and a geometric configuration, an affine matrix is determined so that the persistently exciting conditions of the inter-agent relative bearings can be satisfied since the bearing rates are related to different weighted combinations of the affine matrix vectors. The asymptotic convergence of the estimation error and control error is proved using Filipov properties and cascaded system theories. A sufficient condition for inter-agent collision avoidance is also proposed. Finally, simulation results are given to validate the effectiveness of the method in both 2D and 3D cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.02086v2-abstract-full').style.display = 'none'; document.getElementById('2310.02086v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 6 figures, the paper has been accepted by IFAC WC 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.13963">arXiv:2309.13963</a> <span> [<a href="https://arxiv.org/pdf/2309.13963">pdf</a>, <a href="https://arxiv.org/format/2309.13963">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Connecting Speech Encoder and Large Language Model for ASR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenyi Yu</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+C">Changli Tang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xianzhao Chen</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+T">Tian Tan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Lu Lu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Zejun Ma</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.13963v2-abstract-short" style="display: inline;"> The impressive capability and versatility of large language models (LLMs) have aroused increasing attention in automatic speech recognition (ASR), with several pioneering studies attempting to build integrated ASR models by connecting a speech encoder with an LLM. This paper presents a comparative study of three commonly used structures as connectors, including fully connected layers, multi-head c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.13963v2-abstract-full').style.display = 'inline'; document.getElementById('2309.13963v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.13963v2-abstract-full" style="display: none;"> The impressive capability and versatility of large language models (LLMs) have aroused increasing attention in automatic speech recognition (ASR), with several pioneering studies attempting to build integrated ASR models by connecting a speech encoder with an LLM. This paper presents a comparative study of three commonly used structures as connectors, including fully connected layers, multi-head cross-attention, and Q-Former. Speech encoders from the Whisper model series as well as LLMs from the Vicuna model series with different model sizes were studied. Experiments were performed on the commonly used LibriSpeech, Common Voice, and GigaSpeech datasets, where the LLMs with Q-Formers demonstrated consistent and considerable word error rate (WER) reductions over LLMs with other connector structures. Q-Former-based LLMs can generalise well to out-of-domain datasets, where 12% relative WER reductions over the Whisper baseline ASR model were achieved on the Eval2000 test set without using any in-domain training data from Switchboard. Moreover, a novel segment-level Q-Former is proposed to enable LLMs to recognise speech segments with a duration exceeding the limitation of the encoders, which results in 17% relative WER reductions over other connector structures on 90-second-long speech data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.13963v2-abstract-full').style.display = 'none'; document.getElementById('2309.13963v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.11717">arXiv:2309.11717</a> <span> [<a href="https://arxiv.org/pdf/2309.11717">pdf</a>, <a href="https://arxiv.org/format/2309.11717">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A class-weighted supervised contrastive learning long-tailed bearing fault diagnosis approach using quadratic neural network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei-En Yu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jinwei Sun</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Shiping Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaoge Zhang</a>, <a href="/search/eess?searchtype=author&query=Liao%2C+J">Jing-Xiao Liao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.11717v1-abstract-short" style="display: inline;"> Deep learning has achieved remarkable success in bearing fault diagnosis. However, its performance oftentimes deteriorates when dealing with highly imbalanced or long-tailed data, while such cases are prevalent in industrial settings because fault is a rare event that occurs with an extremely low probability. Conventional data augmentation methods face fundamental limitations due to the scarcity o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11717v1-abstract-full').style.display = 'inline'; document.getElementById('2309.11717v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.11717v1-abstract-full" style="display: none;"> Deep learning has achieved remarkable success in bearing fault diagnosis. However, its performance oftentimes deteriorates when dealing with highly imbalanced or long-tailed data, while such cases are prevalent in industrial settings because fault is a rare event that occurs with an extremely low probability. Conventional data augmentation methods face fundamental limitations due to the scarcity of samples pertaining to the minority class. In this paper, we propose a supervised contrastive learning approach with a class-aware loss function to enhance the feature extraction capability of neural networks for fault diagnosis. The developed class-weighted contrastive learning quadratic network (CCQNet) consists of a quadratic convolutional residual network backbone, a contrastive learning branch utilizing a class-weighted contrastive loss, and a classifier branch employing logit-adjusted cross-entropy loss. By utilizing class-weighted contrastive loss and logit-adjusted cross-entropy loss, our approach encourages equidistant representation of class features, thereby inducing equal attention on all the classes. We further analyze the superior feature extraction ability of quadratic network by establishing the connection between quadratic neurons and autocorrelation in signal processing. Experimental results on public and proprietary datasets are used to validate the effectiveness of CCQNet, and computational results reveal that CCQNet outperforms SOTA methods in handling extremely imbalanced data substantially. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11717v1-abstract-full').style.display = 'none'; document.getElementById('2309.11717v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.09575">arXiv:2309.09575</a> <span> [<a href="https://arxiv.org/pdf/2309.09575">pdf</a>, <a href="https://arxiv.org/format/2309.09575">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning for Near-Field XL-MIMO Transceiver Design: Principles and Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wentao Yu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Y">Yifan Ma</a>, <a href="/search/eess?searchtype=author&query=He%2C+H">Hengtao He</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenghui Song</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.09575v3-abstract-short" style="display: inline;"> Massive multiple-input multiple-output (MIMO) has been a critical enabling technology in 5th generation (5G) wireless networks. With the advent of 6G, a natural evolution is to employ even more antennas, potentially an order of magnitude more, to meet the ever-increasing demand for spectral efficiency. This is beyond a mere quantitative scale-up. The enlarged array aperture brings a paradigm shift… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09575v3-abstract-full').style.display = 'inline'; document.getElementById('2309.09575v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.09575v3-abstract-full" style="display: none;"> Massive multiple-input multiple-output (MIMO) has been a critical enabling technology in 5th generation (5G) wireless networks. With the advent of 6G, a natural evolution is to employ even more antennas, potentially an order of magnitude more, to meet the ever-increasing demand for spectral efficiency. This is beyond a mere quantitative scale-up. The enlarged array aperture brings a paradigm shift towards near-field communications, departing from traditional far-field approaches. However, designing advanced transceiver algorithms for near-field systems is extremely challenging because of the enormous system scale, the complicated channel characteristics, and the uncertainties in the propagation environments. Hence, it is important to develop scalable, low-complexity, and robust algorithms that can efficiently characterize and leverage the properties of the near-field channel. In this article, we discuss the principles and advocate two general frameworks to design deep learning-based near-field transceivers covering both iterative and non-iterative algorithms. Case studies on channel estimation and beam focusing are presented to provide a hands-on tutorial. Finally, we discuss open issues and shed light on future directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09575v3-abstract-full').style.display = 'none'; document.getElementById('2309.09575v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 3 figures, 2 tables, accepted by IEEE Communications Magazine, Special Issue on Near-Field MIMO Technologies Towards 6G</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.01802">arXiv:2308.01802</a> <span> [<a href="https://arxiv.org/pdf/2308.01802">pdf</a>, <a href="https://arxiv.org/ps/2308.01802">ps</a>, <a href="https://arxiv.org/format/2308.01802">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Multi-Carrier Modulation: An Evolution from Time-Frequency Domain to Delay-Doppler Domain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+H">Hai Lin</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+J">Jinhong Yuan</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+J">Jingxian Wu</a>, <a href="/search/eess?searchtype=author&query=Hanzo%2C+L">Lajos Hanzo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.01802v1-abstract-short" style="display: inline;"> The recently proposed orthogonal delay-Doppler division multiplexing (ODDM) modulation, which is based on the new delay-Doppler (DD) domain orthogonal pulse (DDOP), is studied. A substantial benefit of the DDOP-based ODDM or general delay-Doppler domain multi-carrier (DDMC) modulation is that it achieves orthogonality with respect to the fine time and frequency resolutions of the DD domain. We fir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.01802v1-abstract-full').style.display = 'inline'; document.getElementById('2308.01802v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.01802v1-abstract-full" style="display: none;"> The recently proposed orthogonal delay-Doppler division multiplexing (ODDM) modulation, which is based on the new delay-Doppler (DD) domain orthogonal pulse (DDOP), is studied. A substantial benefit of the DDOP-based ODDM or general delay-Doppler domain multi-carrier (DDMC) modulation is that it achieves orthogonality with respect to the fine time and frequency resolutions of the DD domain. We first revisit the family of wireless channel models conceived for linear time-varying (LTV) channels, and then review the conventional multi-carrier (MC) modulation schemes and their design guidelines for both linear time-invariant (LTI) and LTV channels. Then we discuss the time-varying property of the LTV channels' DD domain impulse response and propose an impulse function based transmission strategy for equivalent sampled DD domain (ESDD) channels. Next, we take an in-depth look into the DDOP and the corresponding ODDM modulation to unveil its unique input-output relation for transmission over ESDD channels. Then, we point out that the conventional MC modulation design guidelines based on the Wely-Heisenberg (WH) frame theory can be relaxed without compromising its orthogonality or without violating the WH frame theory. More specifically, for a communication system having given bandwidth and duration, MC modulation signals can be designed based on a WH subset associated with sufficient (bi)orthogonality, which governs the (bi)orthogonality of the MC signal within the bandwidth and duration. This novel design guideline could potentially open up opportunities for developing future waveforms required by new applications such as communication systems associated with high delay and/or Doppler shifts, as well as integrated sensing and communications, etc. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.01802v1-abstract-full').style.display = 'none'; document.getElementById('2308.01802v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been submitted to the IEEE for possible publication. The supplementary material of this work will be posted at https://www.omu.ac.jp/eng/ees-sic/oddm/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.16865">arXiv:2307.16865</a> <span> [<a href="https://arxiv.org/pdf/2307.16865">pdf</a>, <a href="https://arxiv.org/format/2307.16865">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Universal Adversarial Defense in Remote Sensing Based on Pre-trained Denoising Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Weikang Yu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Yonghao Xu</a>, <a href="/search/eess?searchtype=author&query=Ghamisi%2C+P">Pedram Ghamisi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.16865v3-abstract-short" style="display: inline;"> Deep neural networks (DNNs) have risen to prominence as key solutions in numerous AI applications for earth observation (AI4EO). However, their susceptibility to adversarial examples poses a critical challenge, compromising the reliability of AI4EO algorithms. This paper presents a novel Universal Adversarial Defense approach in Remote Sensing Imagery (UAD-RS), leveraging pre-trained diffusion mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16865v3-abstract-full').style.display = 'inline'; document.getElementById('2307.16865v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.16865v3-abstract-full" style="display: none;"> Deep neural networks (DNNs) have risen to prominence as key solutions in numerous AI applications for earth observation (AI4EO). However, their susceptibility to adversarial examples poses a critical challenge, compromising the reliability of AI4EO algorithms. This paper presents a novel Universal Adversarial Defense approach in Remote Sensing Imagery (UAD-RS), leveraging pre-trained diffusion models to protect DNNs against universal adversarial examples exhibiting heterogeneous patterns. Specifically, a universal adversarial purification framework is developed utilizing pre-trained diffusion models to mitigate adversarial perturbations through the introduction of Gaussian noise and subsequent purification of the perturbations from adversarial examples. Additionally, an Adaptive Noise Level Selection (ANLS) mechanism is introduced to determine the optimal noise level for the purification framework with a task-guided Frechet Inception Distance (FID) ranking strategy, thereby enhancing purification performance. Consequently, only a single pre-trained diffusion model is required for purifying universal adversarial samples with heterogeneous patterns across each dataset, significantly reducing training efforts for multiple attack settings while maintaining high performance without prior knowledge of adversarial perturbations. Experimental results on four heterogeneous RS datasets, focusing on scene classification and semantic segmentation, demonstrate that UAD-RS outperforms state-of-the-art adversarial purification approaches, providing universal defense against seven commonly encountered adversarial perturbations. Codes and the pre-trained models are available online (https://github.com/EricYu97/UAD-RS). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16865v3-abstract-full').style.display = 'none'; document.getElementById('2307.16865v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.04327">arXiv:2307.04327</a> <span> [<a href="https://arxiv.org/pdf/2307.04327">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Legal Decision-making for Highway Automated Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ma%2C+X">Xiaohan Ma</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenhao Yu</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+C">Chengxiang Zhao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Changjun Wang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+W">Wenhui Zhou</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+G">Guangming Zhao</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+M">Mingyue Ma</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+W">Weida Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+L">Lin Yang</a>, <a href="/search/eess?searchtype=author&query=Mu%2C+R">Rui Mu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hong Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.04327v1-abstract-short" style="display: inline;"> Compliance with traffic laws is a fundamental requirement for human drivers on the road, and autonomous vehicles must adhere to traffic laws as well. However, current autonomous vehicles prioritize safety and collision avoidance primarily in their decision-making and planning, which will lead to misunderstandings and distrust from human drivers and may even result in accidents in mixed traffic flo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04327v1-abstract-full').style.display = 'inline'; document.getElementById('2307.04327v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.04327v1-abstract-full" style="display: none;"> Compliance with traffic laws is a fundamental requirement for human drivers on the road, and autonomous vehicles must adhere to traffic laws as well. However, current autonomous vehicles prioritize safety and collision avoidance primarily in their decision-making and planning, which will lead to misunderstandings and distrust from human drivers and may even result in accidents in mixed traffic flow. Therefore, ensuring the compliance of the autonomous driving decision-making system is essential for ensuring the safety of autonomous driving and promoting the widespread adoption of autonomous driving technology. To this end, the paper proposes a trigger-based layered compliance decision-making framework. This framework utilizes the decision intent at the highest level as a signal to activate an online violation monitor that identifies the type of violation committed by the vehicle. Then, a four-layer architecture for compliance decision-making is employed to generate compliantly trajectories. Using this system, autonomous vehicles can detect and correct potential violations in real-time, thereby enhancing safety and building public confidence in autonomous driving technology. Finally, the proposed method is evaluated on the DJI AD4CHE highway dataset under four typical highway scenarios: speed limit, following distance, overtaking, and lane-changing. The results indicate that the proposed method increases the vehicle's overall compliance rate from 13.85% to 84.46%, while reducing the proportion of active violations to 0%, demonstrating its effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04327v1-abstract-full').style.display = 'none'; document.getElementById('2307.04327v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.03394">arXiv:2307.03394</a> <span> [<a href="https://arxiv.org/pdf/2307.03394">pdf</a>, <a href="https://arxiv.org/format/2307.03394">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Dual Inverse Degradation Network for Real-World SDRTV-to-HDRTV Conversion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+K">Kepeng Xu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Li Xu</a>, <a href="/search/eess?searchtype=author&query=He%2C+G">Gang He</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+X">Xianyun Wu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhiqiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wenxin Yu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yunsong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.03394v3-abstract-short" style="display: inline;"> In this study, we address the emerging necessity of converting Standard Dynamic Range Television (SDRTV) content into High Dynamic Range Television (HDRTV) in light of the limited number of native HDRTV content. A principal technical challenge in this conversion is the exacerbation of coding artifacts inherent in SDRTV, which detrimentally impacts the quality of the resulting HDRTV. To address thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03394v3-abstract-full').style.display = 'inline'; document.getElementById('2307.03394v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.03394v3-abstract-full" style="display: none;"> In this study, we address the emerging necessity of converting Standard Dynamic Range Television (SDRTV) content into High Dynamic Range Television (HDRTV) in light of the limited number of native HDRTV content. A principal technical challenge in this conversion is the exacerbation of coding artifacts inherent in SDRTV, which detrimentally impacts the quality of the resulting HDRTV. To address this issue, our method introduces a novel approach that conceptualizes the SDRTV-to-HDRTV conversion as a composite task involving dual degradation restoration. This encompasses inverse tone mapping in conjunction with video restoration. We propose Dual Inversion Downgraded SDRTV to HDRTV Network (DIDNet), which can accurately perform inverse tone mapping while preventing encoding artifacts from being amplified, thereby significantly improving visual quality. DIDNet integrates an intermediate auxiliary loss function to effectively separate the dual degradation restoration tasks and efficient learning of both artifact reduction and inverse tone mapping during end-to-end training. Additionally, DIDNet introduces a spatio-temporal feature alignment module for video frame fusion, which augments texture quality and reduces artifacts. The architecture further includes a dual-modulation convolution mechanism for optimized inverse tone mapping. Recognizing the richer texture and high-frequency information in HDRTV compared to SDRTV, we further introduce a wavelet attention module to enhance frequency features. Our approach demonstrates marked superiority over existing state-of-the-art techniques in terms of quantitative performance and visual quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03394v3-abstract-full').style.display = 'none'; document.getElementById('2307.03394v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yu%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yu%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yu%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yu%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yu%2C+W&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository