Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 355 results for author: <span class="mathjax">Li, G</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Li%2C+G">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Li, G"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Li%2C+G&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Li, G"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Li%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Li%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+G&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+G&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17168">arXiv:2502.17168</a> <span> [<a href="https://arxiv.org/pdf/2502.17168">pdf</a>, <a href="https://arxiv.org/format/2502.17168">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> SpikACom: A Neuromorphic Computing Framework for Green Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yanzhen Liu</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+Z">Zhijin Qin</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Y">Yongxu Zhu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17168v1-abstract-short" style="display: inline;"> The ever-growing power consumption of wireless communication systems necessitates more energy-efficient algorithms. This paper introduces SpikACom ({Spik}ing {A}daptive {Com}munication), a neuromorphic computing-based framework for power-intensive wireless communication tasks. SpikACom leverages brain-inspired spiking neural networks (SNNs) for efficient signal processing. It is designed for dynam… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17168v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17168v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17168v1-abstract-full" style="display: none;"> The ever-growing power consumption of wireless communication systems necessitates more energy-efficient algorithms. This paper introduces SpikACom ({Spik}ing {A}daptive {Com}munication), a neuromorphic computing-based framework for power-intensive wireless communication tasks. SpikACom leverages brain-inspired spiking neural networks (SNNs) for efficient signal processing. It is designed for dynamic wireless environments, helping to mitigate catastrophic forgetting and facilitate adaptation to new circumstances. Moreover, SpikACom is customizable, allowing flexibly integration of domain knowledge to enhance it interpretability and efficacy. We validate its performance on fundamental wireless communication tasks, including task-oriented semantic communication, multiple-input multiple-output (MIMO) beamforming, and orthogonal frequency-division multiplexing (OFDM) channel estimation. The simulation results show that SpikACom significantly reduces power consumption while matching or exceeding the performance of conventional algorithms. This study highlights the potential of SNNs for enabling greener and smarter wireless communication systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17168v1-abstract-full').style.display = 'none'; document.getElementById('2502.17168v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16121">arXiv:2502.16121</a> <span> [<a href="https://arxiv.org/pdf/2502.16121">pdf</a>, <a href="https://arxiv.org/format/2502.16121">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> From Target Tracking to Targeting Track -- Part II: Regularized Polynomial Trajectory Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tiancheng Li</a>, <a href="/search/eess?searchtype=author&query=Song%2C+Y">Yan Song</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guchong Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Hao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16121v1-abstract-short" style="display: inline;"> Target tracking entails the estimation of the evolution of the target state over time, namely the target trajectory. Different from the classical state space model, our series of studies, including this paper, model the collection of the target state as a stochastic process (SP) that is further decomposed into a deterministic part which represents the trend of the trajectory and a residual SP repr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16121v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16121v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16121v1-abstract-full" style="display: none;"> Target tracking entails the estimation of the evolution of the target state over time, namely the target trajectory. Different from the classical state space model, our series of studies, including this paper, model the collection of the target state as a stochastic process (SP) that is further decomposed into a deterministic part which represents the trend of the trajectory and a residual SP representing the residual fitting error. Subsequently, the tracking problem is formulated as a learning problem regarding the trajectory SP for which a key part is to estimate a trajectory FoT (T-FoT) best fitting the measurements in time series. For this purpose, we consider the polynomial T-FoT and address the regularized polynomial T-FoT optimization employing two distinct regularization strategies seeking trade-off between the accuracy and simplicity. One limits the order of the polynomial and then the best choice is determined by grid searching in a narrow, bounded range while the other adopts $\ell_0$ norm regularization for which the hybrid Newton solver is employed. Simulation results obtained in both single and multiple maneuvering target scenarios demonstrate the effectiveness of our approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16121v1-abstract-full').style.display = 'none'; document.getElementById('2502.16121v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Part II of a series of companion papers; 11 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12735">arXiv:2502.12735</a> <span> [<a href="https://arxiv.org/pdf/2502.12735">pdf</a>, <a href="https://arxiv.org/format/2502.12735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Task-Oriented Semantic Communication for Stereo-Vision 3D Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cao%2C+Z">Zijian Cao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Hua Zhang</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+L">Le Liang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Haotian Wang</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+S">Shi Jin</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12735v1-abstract-short" style="display: inline;"> With the development of computer vision, 3D object detection has become increasingly important in many real-world applications. Limited by the computing power of sensor-side hardware, the detection task is sometimes deployed on remote computing devices or the cloud to execute complex algorithms, which brings massive data transmission overhead. In response, this paper proposes an optical flow-drive… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12735v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12735v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12735v1-abstract-full" style="display: none;"> With the development of computer vision, 3D object detection has become increasingly important in many real-world applications. Limited by the computing power of sensor-side hardware, the detection task is sometimes deployed on remote computing devices or the cloud to execute complex algorithms, which brings massive data transmission overhead. In response, this paper proposes an optical flow-driven semantic communication framework for the stereo-vision 3D object detection task. The proposed framework fully exploits the dependence of stereo-vision 3D detection on semantic information in images and prioritizes the transmission of this semantic information to reduce total transmission data sizes while ensuring the detection accuracy. Specifically, we develop an optical flow-driven module to jointly extract and recover semantics from the left and right images to reduce the loss of the left-right photometric alignment semantic information and improve the accuracy of depth inference. Then, we design a 2D semantic extraction module to identify and extract semantic meaning around the objects to enhance the transmission of semantic information in the key areas. Finally, a fusion network is used to fuse the recovered semantics, and reconstruct the stereo-vision images for 3D detection. Simulation results show that the proposed method improves the detection accuracy by nearly 70% and outperforms the traditional method, especially for the low signal-to-noise ratio regime. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12735v1-abstract-full').style.display = 'none'; document.getElementById('2502.12735v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05842">arXiv:2502.05842</a> <span> [<a href="https://arxiv.org/pdf/2502.05842">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Grid-Forming HVDC Series Tapping Converter Using Extended Techniques of Flex-LCC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sun%2C+Q">Qianhao Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+R">Ruofan Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jichen Wang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+M">Mingchao Xia</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qifang Chen</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+M">Meiqi Fan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gen Li</a>, <a href="/search/eess?searchtype=author&query=Qiao%2C+X">Xuebo Qiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05842v1-abstract-short" style="display: inline;"> This paper discusses an extension technology for the previously proposed Flexible Line-Commutated Converter (Flex LCC) [1]. The proposed extension involves modifying the arm internal-electromotive-force control, redesigning the main-circuit parameters, and integrating a low-power coordination strategy. As a result, the Flex-LCC transforms from a grid-forming (GFM) voltage source converter (VSC) ba… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05842v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05842v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05842v1-abstract-full" style="display: none;"> This paper discusses an extension technology for the previously proposed Flexible Line-Commutated Converter (Flex LCC) [1]. The proposed extension involves modifying the arm internal-electromotive-force control, redesigning the main-circuit parameters, and integrating a low-power coordination strategy. As a result, the Flex-LCC transforms from a grid-forming (GFM) voltage source converter (VSC) based on series-connected LCC and FBMMC into a novel GFM HVDC series tapping converter, referred to as the Extended Flex-LCC (EFLCC). The EFLCC provides dc characteristics resembling those of current source converters (CSCs) and ac characteristics resembling those of GFM VSCs. This makes it easier to integrate relatively small renewable energy sources (RESs) that operate in islanded or weak-grid supported conditions with an existing LCC-HVDC. Meanwhile, the EFLCC distinguishes itself by requiring fewer full-controlled switches and less energy storage, resulting in lower losses and costs compared to the FBMMC HVDC series tap solution. In particular, the reduced capacity requirement and the wide allowable range of valve-side ac voltages in the FBMMC part facilitate the matching of current-carrying capacities between full-controlled switches and thyristors. The application scenario, system-level analysis, implementation, converter-level operation, and comparison of the EFLCC are presented in detail in this paper. The theoretical analysis is confirmed by experimental and simulation results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05842v1-abstract-full').style.display = 'none'; document.getElementById('2502.05842v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04912">arXiv:2502.04912</a> <span> [<a href="https://arxiv.org/pdf/2502.04912">pdf</a>, <a href="https://arxiv.org/format/2502.04912">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Joint Beamforming Design for Integrated Sensing and Communication Systems with Hybrid-Colluding Eavesdroppers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+M">Meiding Liu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Z">Zhengchun Zhou</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Q">Qiao Shi</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guyue Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zilong Liu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+P">Pingzhi Fan</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+I">Inkyu Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04912v1-abstract-short" style="display: inline;"> In this paper, we consider the physical layer security (PLS) problem for integrated sensing and communication (ISAC) systems in the presence of hybrid-colluding eavesdroppers, where an active eavesdropper (AE) and a passive eavesdropper (PE) collude to intercept the confidential information. To ensure the accuracy of sensing while preventing the eavesdropping, a base station transmits a signal con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04912v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04912v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04912v1-abstract-full" style="display: none;"> In this paper, we consider the physical layer security (PLS) problem for integrated sensing and communication (ISAC) systems in the presence of hybrid-colluding eavesdroppers, where an active eavesdropper (AE) and a passive eavesdropper (PE) collude to intercept the confidential information. To ensure the accuracy of sensing while preventing the eavesdropping, a base station transmits a signal consisting of information symbols and sensing waveform, in which the sensing waveform can be also used as artificial noise to interfere with eavesdroppers. Under this setup, we propose an alternating optimization-based two stage scheme (AO-TSS) for improving the sensing and communication performance. In the first stage, based on the assumptions that the perfect channel state information (CSI) of the AE and statistical CSI of the PE are known, the communication and sensing beamforming problem is formulated with the objective of minimizing the weighted sum of the beampattern matching mean squared error (MSE) and cross-correlation, subject to the secure transmission constraint. To tackle the non-convexity, we propose a semi-definite relaxation (SDR) algorithm and a reduced-complexity zero-forcing (ZF) algorithm. Then, the scenarios are further extended to more general cases with imperfect AE CSI and unknown PE CSI. To further improve the communication performance, the second-stage problem is developed to optimize the secrecy rate threshold under the radar performance constraint. Finally, numerical results demonstrate the superiority of the proposed scheme in terms of sensing and secure communication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04912v1-abstract-full').style.display = 'none'; document.getElementById('2502.04912v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02385">arXiv:2502.02385</a> <span> [<a href="https://arxiv.org/pdf/2502.02385">pdf</a>, <a href="https://arxiv.org/format/2502.02385">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Achieving Hiding and Smart Anti-Jamming Communication: A Parallel DRL Approach against Moving Reactive Jammer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yangyang Li</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Yuhua Xu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+W">Wen Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guoxin Li</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+Z">Zhibing Feng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Songyi Liu</a>, <a href="/search/eess?searchtype=author&query=Du%2C+J">Jiatao Du</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xinran Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02385v1-abstract-short" style="display: inline;"> This paper addresses the challenge of anti-jamming in moving reactive jamming scenarios. The moving reactive jammer initiates high-power tracking jamming upon detecting any transmission activity, and when unable to detect a signal, resorts to indiscriminate jamming. This presents dual imperatives: maintaining hiding to avoid the jammer's detection and simultaneously evading indiscriminate jamming.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02385v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02385v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02385v1-abstract-full" style="display: none;"> This paper addresses the challenge of anti-jamming in moving reactive jamming scenarios. The moving reactive jammer initiates high-power tracking jamming upon detecting any transmission activity, and when unable to detect a signal, resorts to indiscriminate jamming. This presents dual imperatives: maintaining hiding to avoid the jammer's detection and simultaneously evading indiscriminate jamming. Spread spectrum techniques effectively reduce transmitting power to elude detection but fall short in countering indiscriminate jamming. Conversely, changing communication frequencies can help evade indiscriminate jamming but makes the transmission vulnerable to tracking jamming without spread spectrum techniques to remain hidden. Current methodologies struggle with the complexity of simultaneously optimizing these two requirements due to the expansive joint action spaces and the dynamics of moving reactive jammers. To address these challenges, we propose a parallelized deep reinforcement learning (DRL) strategy. The approach includes a parallelized network architecture designed to decompose the action space. A parallel exploration-exploitation selection mechanism replaces the $\varepsilon $-greedy mechanism, accelerating convergence. Simulations demonstrate a nearly 90\% increase in normalized throughput. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02385v1-abstract-full').style.display = 'none'; document.getElementById('2502.02385v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17776">arXiv:2501.17776</a> <span> [<a href="https://arxiv.org/pdf/2501.17776">pdf</a>, <a href="https://arxiv.org/ps/2501.17776">ps</a>, <a href="https://arxiv.org/format/2501.17776">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Low-Complexity Multi-Target Detection in ELAA ISAC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Galappaththige%2C+D">Diluka Galappaththige</a>, <a href="/search/eess?searchtype=author&query=Zargari%2C+S">Shayan Zargari</a>, <a href="/search/eess?searchtype=author&query=Tellambura%2C+C">Chintha Tellambura</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17776v1-abstract-short" style="display: inline;"> Multi-target detection and communication with extremely large-scale antenna arrays (ELAAs) operating at high frequencies necessitate generating multiple beams. However, conventional algorithms are slow and computationally intensive. For instance, they can simulate a \num{200}-antenna system over two weeks, and the time complexity grows exponentially with the number of antennas. Thus, this letter e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17776v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17776v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17776v1-abstract-full" style="display: none;"> Multi-target detection and communication with extremely large-scale antenna arrays (ELAAs) operating at high frequencies necessitate generating multiple beams. However, conventional algorithms are slow and computationally intensive. For instance, they can simulate a \num{200}-antenna system over two weeks, and the time complexity grows exponentially with the number of antennas. Thus, this letter explores an ultra-low-complex solution for a multi-user, multi-target integrated sensing and communication (ISAC) system equipped with an ELAA base station (BS). It maximizes the communication sum rate while meeting sensing beampattern gain targets and transmit power constraints. As this problem is non-convex, a Riemannian stochastic gradient descent-based augmented Lagrangian manifold optimization (SGALM) algorithm is developed, which searches on a manifold to ensure constraint compliance. The algorithm achieves ultra-low complexity and superior runtime performance compared to conventional algorithms. For example, it is \num{56} times faster than the standard benchmark for \num{257} BS antennas. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17776v1-abstract-full').style.display = 'none'; document.getElementById('2501.17776v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 5 figures, Letter</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15264">arXiv:2501.15264</a> <span> [<a href="https://arxiv.org/pdf/2501.15264">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Fusion of Millimeter-wave Radar and Pulse Oximeter Data for Low-burden Diagnosis of Obstructive Sleep Apnea-Hypopnea Syndrome </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhaoxi Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+W">Wenyu Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zetao Wang</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+X">Xiang Zhao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chenyang Li</a>, <a href="/search/eess?searchtype=author&query=Guan%2C+J">Jian Guan</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+S">Shankai Yin</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15264v1-abstract-short" style="display: inline;"> Objective: The aim of the study is to develop a novel method for improved diagnosis of obstructive sleep apnea-hypopnea syndrome (OSAHS) in clinical or home settings, with the focus on achieving diagnostic performance comparable to the gold-standard polysomnography (PSG) with significantly reduced monitoring burden. Methods: We propose a method using millimeter-wave radar and pulse oximeter for OS… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15264v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15264v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15264v1-abstract-full" style="display: none;"> Objective: The aim of the study is to develop a novel method for improved diagnosis of obstructive sleep apnea-hypopnea syndrome (OSAHS) in clinical or home settings, with the focus on achieving diagnostic performance comparable to the gold-standard polysomnography (PSG) with significantly reduced monitoring burden. Methods: We propose a method using millimeter-wave radar and pulse oximeter for OSAHS diagnosis (ROSA). It contains a sleep apnea-hypopnea events (SAE) detection network, which directly predicts the temporal localization of SAE, and a sleep staging network, which predicts the sleep stages throughout the night, based on radar signals. It also fuses oxygen saturation (SpO2) information from the pulse oximeter to adjust the score of SAE detected by radar. Results: Experimental results on a real-world dataset (>800 hours of overnight recordings, 100 subjects) demonstrated high agreement (ICC=0.9870) on apnea-hypopnea index (AHI) between ROSA and PSG. ROSA also exhibited excellent diagnostic performance, exceeding 90% in accuracy across AHI diagnostic thresholds of 5, 15 and 30 events/h. Conclusion: ROSA improves diagnostic accuracy by fusing millimeter-wave radar and pulse oximeter data. It provides a reliable and low-burden solution for OSAHS diagnosis. Significance: ROSA addresses the limitations of high complexity and monitoring burden associated with traditional PSG. The high accuracy and low burden of ROSA show its potential to improve the accessibility of OSAHS diagnosis among population. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15264v1-abstract-full').style.display = 'none'; document.getElementById('2501.15264v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13306">arXiv:2501.13306</a> <span> [<a href="https://arxiv.org/pdf/2501.13306">pdf</a>, <a href="https://arxiv.org/format/2501.13306">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> OSUM: Advancing Open Speech Understanding Models with Limited Resources in Academia </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Geng%2C+X">Xuelong Geng</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+K">Kun Wei</a>, <a href="/search/eess?searchtype=author&query=Shao%2C+Q">Qijie Shao</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuiyun Liu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Z">Zhennan Lin</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Z">Zhixian Zhao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guojian Li</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+W">Wenjie Tian</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+P">Peikun Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yangze Li</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+P">Pengcheng Guo</a>, <a href="/search/eess?searchtype=author&query=Shao%2C+M">Mingchen Shao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shuiyuan Wang</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+Y">Yuang Cao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Chengyou Wang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+T">Tianyi Xu</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+Y">Yuhang Dai</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+X">Xinfa Zhu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yue Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Li Zhang</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13306v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have made significant progress in various downstream tasks, inspiring the development of Speech Understanding Language Models (SULMs) to enable comprehensive speech-based interactions. However, most advanced SULMs are developed by the industry, leveraging large-scale datasets and computational resources that are not readily available to the academic community. Moreover… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13306v2-abstract-full').style.display = 'inline'; document.getElementById('2501.13306v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13306v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have made significant progress in various downstream tasks, inspiring the development of Speech Understanding Language Models (SULMs) to enable comprehensive speech-based interactions. However, most advanced SULMs are developed by the industry, leveraging large-scale datasets and computational resources that are not readily available to the academic community. Moreover, the lack of transparency in training details creates additional barriers to further innovation. In this study, we present OSUM, an Open Speech Understanding Model designed to explore the potential of training SLUMs under constrained academic resources. The OSUM model combines a Whisper encoder with a Qwen2 LLM and supports a wide range of speech tasks, including speech recognition (ASR), speech recognition with timestamps (SRWT), vocal event detection (VED), speech emotion recognition (SER), speaking style recognition (SSR), speaker gender classification (SGC), speaker age prediction (SAP), and speech-to-text chat (STTC). By employing an ASR+X training strategy, OSUM achieves efficient and stable multi-task training by simultaneously optimizing ASR alongside target tasks. Beyond delivering strong performance, OSUM emphasizes transparency by providing openly available data preparation and training methodologies, offering valuable insights and practical guidance for the academic community. By doing so, we aim to accelerate research and innovation in advanced SULM technologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13306v2-abstract-full').style.display = 'none'; document.getElementById('2501.13306v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">OSUM Technical Report v2. The experimental results reported herein differ from those in v1 because of adding new data and training in more steps</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07041">arXiv:2501.07041</a> <span> [<a href="https://arxiv.org/pdf/2501.07041">pdf</a>, <a href="https://arxiv.org/format/2501.07041">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Beam Structured Turbo Receiver for HF Skywave Massive MIMO </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+D">Ding Shi</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07041v1-abstract-short" style="display: inline;"> In this paper, we investigate receiver design for high frequency (HF) skywave massive multiple-input multiple-output (MIMO) communications. We first establish a modified beam based channel model (BBCM) by performing uniform sampling for directional cosine with deterministic sampling interval, where the beam matrix is constructed using a phase-shifted discrete Fourier transform (DFT) matrix. Based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07041v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07041v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07041v1-abstract-full" style="display: none;"> In this paper, we investigate receiver design for high frequency (HF) skywave massive multiple-input multiple-output (MIMO) communications. We first establish a modified beam based channel model (BBCM) by performing uniform sampling for directional cosine with deterministic sampling interval, where the beam matrix is constructed using a phase-shifted discrete Fourier transform (DFT) matrix. Based on the modified BBCM, we propose a beam structured turbo receiver (BSTR) involving low-dimensional beam domain signal detection for grouped user terminals (UTs), which is proved to be asymptotically optimal in terms of minimizing mean-squared error (MSE). Moreover, we extend it to windowed BSTR by introducing a windowing approach for interference suppression and complexity reduction, and propose a well-designed energy-focusing window. We also present an efficient implementation of the windowed BSTR by exploiting the structure properties of the beam matrix and the beam domain channel sparsity. Simulation results validate the superior performance of the proposed receivers but with remarkably low complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07041v1-abstract-full').style.display = 'none'; document.getElementById('2501.07041v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06356">arXiv:2501.06356</a> <span> [<a href="https://arxiv.org/pdf/2501.06356">pdf</a>, <a href="https://arxiv.org/format/2501.06356">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Ultrasound Image Synthesis Using Generative AI for Lung Ultrasound Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chou%2C+Y">Yu-Cheng Chou</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Gary Y. Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+L">Li Chen</a>, <a href="/search/eess?searchtype=author&query=Zahiri%2C+M">Mohsen Zahiri</a>, <a href="/search/eess?searchtype=author&query=Balaraju%2C+N">Naveen Balaraju</a>, <a href="/search/eess?searchtype=author&query=Patil%2C+S">Shubham Patil</a>, <a href="/search/eess?searchtype=author&query=Hicks%2C+B">Bryson Hicks</a>, <a href="/search/eess?searchtype=author&query=Schnittke%2C+N">Nikolai Schnittke</a>, <a href="/search/eess?searchtype=author&query=Kessler%2C+D+O">David O. Kessler</a>, <a href="/search/eess?searchtype=author&query=Shupp%2C+J">Jeffrey Shupp</a>, <a href="/search/eess?searchtype=author&query=Parker%2C+M">Maria Parker</a>, <a href="/search/eess?searchtype=author&query=Baloescu%2C+C">Cristiana Baloescu</a>, <a href="/search/eess?searchtype=author&query=Moore%2C+C">Christopher Moore</a>, <a href="/search/eess?searchtype=author&query=Gregory%2C+C">Cynthia Gregory</a>, <a href="/search/eess?searchtype=author&query=Gregory%2C+K">Kenton Gregory</a>, <a href="/search/eess?searchtype=author&query=Raju%2C+B">Balasundar Raju</a>, <a href="/search/eess?searchtype=author&query=Kruecker%2C+J">Jochen Kruecker</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+A">Alvin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06356v1-abstract-short" style="display: inline;"> Developing reliable healthcare AI models requires training with representative and diverse data. In imbalanced datasets, model performance tends to plateau on the more prevalent classes while remaining low on less common cases. To overcome this limitation, we propose DiffUltra, the first generative AI technique capable of synthesizing realistic Lung Ultrasound (LUS) images with extensive lesion va… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06356v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06356v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06356v1-abstract-full" style="display: none;"> Developing reliable healthcare AI models requires training with representative and diverse data. In imbalanced datasets, model performance tends to plateau on the more prevalent classes while remaining low on less common cases. To overcome this limitation, we propose DiffUltra, the first generative AI technique capable of synthesizing realistic Lung Ultrasound (LUS) images with extensive lesion variability. Specifically, we condition the generative AI by the introduced Lesion-anatomy Bank, which captures the lesion's structural and positional properties from real patient data to guide the image synthesis.We demonstrate that DiffUltra improves consolidation detection by 5.6% in AP compared to the models trained solely on real patient data. More importantly, DiffUltra increases data diversity and prevalence of rare cases, leading to a 25% AP improvement in detecting rare instances such as large lung consolidations, which make up only 10% of the dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06356v1-abstract-full').style.display = 'none'; document.getElementById('2501.06356v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ISBI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05859">arXiv:2501.05859</a> <span> [<a href="https://arxiv.org/pdf/2501.05859">pdf</a>, <a href="https://arxiv.org/format/2501.05859">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Large Model Empowered Streaming Speech Semantic Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Weng%2C+Z">Zhenzi Weng</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+Z">Zhijin Qin</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05859v2-abstract-short" style="display: inline;"> In this paper, we introduce a large model-empowered streaming semantic communication system for speech transmission across various languages, named LSSC-ST. Specifically, we devise an edge-device collaborative semantic communication architecture by offloading the intricate semantic extraction and channel coding modules to edge servers, thereby reducing the computational burden on local devices. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05859v2-abstract-full').style.display = 'inline'; document.getElementById('2501.05859v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05859v2-abstract-full" style="display: none;"> In this paper, we introduce a large model-empowered streaming semantic communication system for speech transmission across various languages, named LSSC-ST. Specifically, we devise an edge-device collaborative semantic communication architecture by offloading the intricate semantic extraction and channel coding modules to edge servers, thereby reducing the computational burden on local devices. To support multilingual speech transmission, pre-trained large speech models are utilized to learn unified semantic features from speech in different languages, breaking the constraint of a single input language and enhancing the practicality of the LSSC-ST. Moreover, the input speech is sequentially streamed into the developed system as short speech segments, which enables low transmission latency without degrading the quality of the produced speech. A novel dynamic speech segmentation algorithm is proposed to further reduce the transmission latency by adaptively adjusting the duration of speech segments. According to simulation results, the LSSC-ST provides more accurate speech transmission and achieves a streaming manner with lower latency compared to the existing non-streaming semantic communication systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05859v2-abstract-full').style.display = 'none'; document.getElementById('2501.05859v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04256">arXiv:2501.04256</a> <span> [<a href="https://arxiv.org/pdf/2501.04256">pdf</a>, <a href="https://arxiv.org/format/2501.04256">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DrawSpeech: Expressive Speech Synthesis Using Prosodic Sketches as Control Conditions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weidong Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+S">Shan Yang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guangzhi Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+X">Xixin Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04256v1-abstract-short" style="display: inline;"> Controlling text-to-speech (TTS) systems to synthesize speech with the prosodic characteristics expected by users has attracted much attention. To achieve controllability, current studies focus on two main directions: (1) using reference speech as prosody prompt to guide speech synthesis, and (2) using natural language descriptions to control the generation process. However, finding reference spee… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04256v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04256v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04256v1-abstract-full" style="display: none;"> Controlling text-to-speech (TTS) systems to synthesize speech with the prosodic characteristics expected by users has attracted much attention. To achieve controllability, current studies focus on two main directions: (1) using reference speech as prosody prompt to guide speech synthesis, and (2) using natural language descriptions to control the generation process. However, finding reference speech that exactly contains the prosody that users want to synthesize takes a lot of effort. Description-based guidance in TTS systems can only determine the overall prosody, which has difficulty in achieving fine-grained prosody control over the synthesized speech. In this paper, we propose DrawSpeech, a sketch-conditioned diffusion model capable of generating speech based on any prosody sketches drawn by users. Specifically, the prosody sketches are fed to DrawSpeech to provide a rough indication of the expected prosody trends. DrawSpeech then recovers the detailed pitch and energy contours based on the coarse sketches and synthesizes the desired speech. Experimental results show that DrawSpeech can generate speech with a wide variety of prosody and can precisely control the fine-grained prosody in a user-friendly manner. Our implementation and audio samples are publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04256v1-abstract-full').style.display = 'none'; document.getElementById('2501.04256v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03643">arXiv:2501.03643</a> <span> [<a href="https://arxiv.org/pdf/2501.03643">pdf</a>, <a href="https://arxiv.org/format/2501.03643">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Effective and Efficient Mixed Precision Quantization of Speech Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+H">Haoning Xu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhaoqing Li</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+Z">Zengrui Jin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Huimeng Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Youjun Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guinan Li</a>, <a href="/search/eess?searchtype=author&query=Geng%2C+M">Mengzhe Geng</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+S">Shujie Hu</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+J">Jiajun Deng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xunying Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03643v2-abstract-short" style="display: inline;"> This paper presents a novel mixed-precision quantization approach for speech foundation models that tightly integrates mixed-precision learning and quantized model parameter estimation into one single model compression stage. Experiments conducted on LibriSpeech dataset with fine-tuned wav2vec2.0-base and HuBERT-large models suggest the resulting mixed-precision quantized models increased the loss… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03643v2-abstract-full').style.display = 'inline'; document.getElementById('2501.03643v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03643v2-abstract-full" style="display: none;"> This paper presents a novel mixed-precision quantization approach for speech foundation models that tightly integrates mixed-precision learning and quantized model parameter estimation into one single model compression stage. Experiments conducted on LibriSpeech dataset with fine-tuned wav2vec2.0-base and HuBERT-large models suggest the resulting mixed-precision quantized models increased the lossless compression ratio by factors up to 1.7x and 1.9x over the respective uniform-precision and two-stage mixed-precision quantized baselines that perform precision learning and model parameters quantization in separate and disjointed stages, while incurring no statistically word error rate (WER) increase over the 32-bit full-precision models. The system compression time of wav2vec2.0-base and HuBERT-large models is reduced by up to 1.9 and 1.5 times over the two-stage mixed-precision baselines, while both produce lower WERs. The best-performing 3.5-bit mixed-precision quantized HuBERT-large model produces a lossless compression ratio of 8.6x over the 32-bit full-precision system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03643v2-abstract-full').style.display = 'none'; document.getElementById('2501.03643v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at IEEE ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02016">arXiv:2501.02016</a> <span> [<a href="https://arxiv.org/pdf/2501.02016">pdf</a>, <a href="https://arxiv.org/format/2501.02016">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> ST-HCSS: Deep Spatio-Temporal Hypergraph Convolutional Neural Network for Soft Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tew%2C+H+H">Hwa Hui Tew</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+F">Fan Ding</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gaoxuan Li</a>, <a href="/search/eess?searchtype=author&query=Loo%2C+J+Y">Junn Yong Loo</a>, <a href="/search/eess?searchtype=author&query=Ting%2C+C">Chee-Ming Ting</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Z+Y">Ze Yang Ding</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+C+P">Chee Pin Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02016v1-abstract-short" style="display: inline;"> Higher-order sensor networks are more accurate in characterizing the nonlinear dynamics of sensory time-series data in modern industrial settings by allowing multi-node connections beyond simple pairwise graph edges. In light of this, we propose a deep spatio-temporal hypergraph convolutional neural network for soft sensing (ST-HCSS). In particular, our proposed framework is able to construct and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02016v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02016v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02016v1-abstract-full" style="display: none;"> Higher-order sensor networks are more accurate in characterizing the nonlinear dynamics of sensory time-series data in modern industrial settings by allowing multi-node connections beyond simple pairwise graph edges. In light of this, we propose a deep spatio-temporal hypergraph convolutional neural network for soft sensing (ST-HCSS). In particular, our proposed framework is able to construct and leverage a higher-order graph (hypergraph) to model the complex multi-interactions between sensor nodes in the absence of prior structural knowledge. To capture rich spatio-temporal relationships underlying sensor data, our proposed ST-HCSS incorporates stacked gated temporal and hypergraph convolution layers to effectively aggregate and update hypergraph information across time and nodes. Our results validate the superiority of ST-HCSS compared to existing state-of-the-art soft sensors, and demonstrates that the learned hypergraph feature representations aligns well with the sensor data correlations. The code is available at https://github.com/htew0001/ST-HCSS.git <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02016v1-abstract-full').style.display = 'none'; document.getElementById('2501.02016v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the 2025 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2025)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02015">arXiv:2501.02015</a> <span> [<a href="https://arxiv.org/pdf/2501.02015">pdf</a>, <a href="https://arxiv.org/format/2501.02015">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> KANS: Knowledge Discovery Graph Attention Network for Soft Sensing in Multivariate Industrial Processes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tew%2C+H+H">Hwa Hui Tew</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gaoxuan Li</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+F">Fan Ding</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+X">Xuewen Luo</a>, <a href="/search/eess?searchtype=author&query=Loo%2C+J+Y">Junn Yong Loo</a>, <a href="/search/eess?searchtype=author&query=Ting%2C+C">Chee-Ming Ting</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Z+Y">Ze Yang Ding</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+C+P">Chee Pin Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02015v1-abstract-short" style="display: inline;"> Soft sensing of hard-to-measure variables is often crucial in industrial processes. Current practices rely heavily on conventional modeling techniques that show success in improving accuracy. However, they overlook the non-linear nature, dynamics characteristics, and non-Euclidean dependencies between complex process variables. To tackle these challenges, we present a framework known as a Knowledg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02015v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02015v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02015v1-abstract-full" style="display: none;"> Soft sensing of hard-to-measure variables is often crucial in industrial processes. Current practices rely heavily on conventional modeling techniques that show success in improving accuracy. However, they overlook the non-linear nature, dynamics characteristics, and non-Euclidean dependencies between complex process variables. To tackle these challenges, we present a framework known as a Knowledge discovery graph Attention Network for effective Soft sensing (KANS). Unlike the existing deep learning soft sensor models, KANS can discover the intrinsic correlations and irregular relationships between the multivariate industrial processes without a predefined topology. First, an unsupervised graph structure learning method is introduced, incorporating the cosine similarity between different sensor embedding to capture the correlations between sensors. Next, we present a graph attention-based representation learning that can compute the multivariate data parallelly to enhance the model in learning complex sensor nodes and edges. To fully explore KANS, knowledge discovery analysis has also been conducted to demonstrate the interpretability of the model. Experimental results demonstrate that KANS significantly outperforms all the baselines and state-of-the-art methods in soft sensing performance. Furthermore, the analysis shows that KANS can find sensors closely related to different process variables without domain knowledge, significantly improving soft sensing accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02015v1-abstract-full').style.display = 'none'; document.getElementById('2501.02015v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IEEE International Conference on Systems, Man, and Cybernetics (IEEE SMC 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19106">arXiv:2412.19106</a> <span> [<a href="https://arxiv.org/pdf/2412.19106">pdf</a>, <a href="https://arxiv.org/format/2412.19106">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> ERGNN: Spectral Graph Neural Network With Explicitly-Optimized Rational Graph Filters </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+G">Guoming Li</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+S">Shangsong Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19106v2-abstract-short" style="display: inline;"> Approximation-based spectral graph neural networks, which construct graph filters with function approximation, have shown substantial performance in graph learning tasks. Despite their great success, existing works primarily employ polynomial approximation to construct the filters, whereas another superior option, namely ration approximation, remains underexplored. Although a handful of prior work… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19106v2-abstract-full').style.display = 'inline'; document.getElementById('2412.19106v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19106v2-abstract-full" style="display: none;"> Approximation-based spectral graph neural networks, which construct graph filters with function approximation, have shown substantial performance in graph learning tasks. Despite their great success, existing works primarily employ polynomial approximation to construct the filters, whereas another superior option, namely ration approximation, remains underexplored. Although a handful of prior works have attempted to deploy the rational approximation, their implementations often involve intensive computational demands or still resort to polynomial approximations, hindering full potential of the rational graph filters. To address the issues, this paper introduces ERGNN, a novel spectral GNN with explicitly-optimized rational filter. ERGNN adopts a unique two-step framework that sequentially applies the numerator filter and the denominator filter to the input signals, thus streamlining the model paradigm while enabling explicit optimization of both numerator and denominator of the rational filter. Extensive experiments validate the superiority of ERGNN over state-of-the-art methods, establishing it as a practical solution for deploying rational-based GNNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19106v2-abstract-full').style.display = 'none'; document.getElementById('2412.19106v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in 2025 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18832">arXiv:2412.18832</a> <span> [<a href="https://arxiv.org/pdf/2412.18832">pdf</a>, <a href="https://arxiv.org/format/2412.18832">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Structured Speaker-Deficiency Adaptation of Foundation Models for Dysarthric and Elderly Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+S">Shujie Hu</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+X">Xurong Xie</a>, <a href="/search/eess?searchtype=author&query=Geng%2C+M">Mengzhe Geng</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+J">Jiajun Deng</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+Z">Zengrui Jin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+T">Tianzi Wang</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+M">Mingyu Cui</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guinan Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhaoqing Li</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+H">Helen Meng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xunying Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18832v1-abstract-short" style="display: inline;"> Data-intensive fine-tuning of speech foundation models (SFMs) to scarce and diverse dysarthric and elderly speech leads to data bias and poor generalization to unseen speakers. This paper proposes novel structured speaker-deficiency adaptation approaches for SSL pre-trained SFMs on such data. Speaker and speech deficiency invariant SFMs were constructed in their supervised adaptive fine-tuning sta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18832v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18832v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18832v1-abstract-full" style="display: none;"> Data-intensive fine-tuning of speech foundation models (SFMs) to scarce and diverse dysarthric and elderly speech leads to data bias and poor generalization to unseen speakers. This paper proposes novel structured speaker-deficiency adaptation approaches for SSL pre-trained SFMs on such data. Speaker and speech deficiency invariant SFMs were constructed in their supervised adaptive fine-tuning stage to reduce undue bias to training data speakers, and serves as a more neutral and robust starting point for test time unsupervised adaptation. Speech variability attributed to speaker identity and speech impairment severity, or aging induced neurocognitive decline, are modelled using separate adapters that can be combined together to model any seen or unseen speaker. Experiments on the UASpeech dysarthric and DementiaBank Pitt elderly speech corpora suggest structured speaker-deficiency adaptation of HuBERT and Wav2vec2-conformer models consistently outperforms baseline SFMs using either: a) no adapters; b) global adapters shared among all speakers; or c) single attribute adapters modelling speaker or deficiency labels alone by statistically significant WER reductions up to 3.01% and 1.50% absolute (10.86% and 6.94% relative) on the two tasks respectively. The lowest published WER of 19.45% (49.34% on very low intelligibility, 33.17% on unseen words) is obtained on the UASpeech test set of 16 dysarthric speakers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18832v1-abstract-full').style.display = 'none'; document.getElementById('2412.18832v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18453">arXiv:2412.18453</a> <span> [<a href="https://arxiv.org/pdf/2412.18453">pdf</a>, <a href="https://arxiv.org/format/2412.18453">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Clutter Resilient Occlusion Avoidance for Tightly-Coupled Motion-Assisted Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xie%2C+Z">Zhixuan Xie</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jianjun Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guoliang Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shuai Wang</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+K">Kejiang Ye</a>, <a href="/search/eess?searchtype=author&query=Eldar%2C+Y+C">Yonina C. Eldar</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+C">Chengzhong Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18453v1-abstract-short" style="display: inline;"> Occlusion is a key factor leading to detection failures. This paper proposes a motion-assisted detection (MAD) method that actively plans an executable path, for the robot to observe the target at a new viewpoint with potentially reduced occlusion. In contrast to existing MAD approaches that may fail in cluttered environments, the proposed framework is robust in such scenarios, therefore termed cl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18453v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18453v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18453v1-abstract-full" style="display: none;"> Occlusion is a key factor leading to detection failures. This paper proposes a motion-assisted detection (MAD) method that actively plans an executable path, for the robot to observe the target at a new viewpoint with potentially reduced occlusion. In contrast to existing MAD approaches that may fail in cluttered environments, the proposed framework is robust in such scenarios, therefore termed clutter resilient occlusion avoidance (CROA). The crux to CROA is to minimize the occlusion probability under polyhedron-based collision avoidance constraints via the convex-concave procedure and duality-based bilevel optimization. The system implementation supports lidar-based MAD with intertwined execution of learning-based detection and optimization-based planning. Experiments show that CROA outperforms various MAD schemes under a sparse convolutional neural network detector, in terms of point density, occlusion ratio, and detection error, in a multi-lane urban driving scenario. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18453v1-abstract-full').style.display = 'none'; document.getElementById('2412.18453v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 figures, accepted by ICASSP'25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17062">arXiv:2412.17062</a> <span> [<a href="https://arxiv.org/pdf/2412.17062">pdf</a>, <a href="https://arxiv.org/ps/2412.17062">ps</a>, <a href="https://arxiv.org/format/2412.17062">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Hybrid Beamforming Design for RSMA-enabled Near-Field Integrated Sensing and Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhou%2C+J">Jiasi Zhou</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+C">Cong Zhou</a>, <a href="/search/eess?searchtype=author&query=Tellambura%2C+C">Chintha Tellambura</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17062v1-abstract-short" style="display: inline;"> To enable high data rates and sensing resolutions, integrated sensing and communication (ISAC) networks leverage extremely large antenna arrays and high frequencies, extending the Rayleigh distance and making near-field (NF) spherical wave propagation dominant. This unlocks numerous spatial degrees of freedom, raising the challenge of optimizing them for communication and sensing tradeoffs. To thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17062v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17062v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17062v1-abstract-full" style="display: none;"> To enable high data rates and sensing resolutions, integrated sensing and communication (ISAC) networks leverage extremely large antenna arrays and high frequencies, extending the Rayleigh distance and making near-field (NF) spherical wave propagation dominant. This unlocks numerous spatial degrees of freedom, raising the challenge of optimizing them for communication and sensing tradeoffs. To this end, we propose a rate-splitting multiple access (RSMA)-based NF-ISAC transmit scheme utilizing hybrid digital-analog antennas. RSMA enhances interference management, while a variable number of dedicated sensing beams adds beamforming flexibility. The objective is to maximize the minimum communication rate while ensuring multi-target sensing performance by jointly optimizing receive filters, analog and digital beamformers, common rate allocation, and the sensing beam count. To address uncertainty in sensing beam allocation, a rank-zero solution reconstruction method demonstrates that dedicated sensing beams are unnecessary for NF multi-target detection. A penalty dual decomposition (PDD)-based double-loop algorithm is introduced, employing weighted minimum mean-squared error (WMMSE) and quadratic transforms to reformulate communication and sensing rates. Simulations reveal that the proposed scheme: 1) Achieves performance comparable to fully digital beamforming with fewer RF chains, (2) Maintains NF multi-target detection without compromising communication rates, and 3) Significantly outperforms space division multiple access (SDMA) and far-field ISAC systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17062v1-abstract-full').style.display = 'none'; document.getElementById('2412.17062v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages and 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14369">arXiv:2412.14369</a> <span> [<a href="https://arxiv.org/pdf/2412.14369">pdf</a>, <a href="https://arxiv.org/ps/2412.14369">ps</a>, <a href="https://arxiv.org/format/2412.14369">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Uncertainty Awareness in Wireless Communications, Sensing, and Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shixiong Wang</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+W">Wei Dai</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14369v1-abstract-short" style="display: inline;"> Wireless communications and sensing (WCS) establish the backbone of modern information exchange and environment perception. Typical applications range from mobile networks and the Internet of Things to radar and sensor grids. The incorporation of machine learning further expands WCS's boundaries, unlocking automated and high-quality data analytics, together with advisable and efficient decision-ma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14369v1-abstract-full').style.display = 'inline'; document.getElementById('2412.14369v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14369v1-abstract-full" style="display: none;"> Wireless communications and sensing (WCS) establish the backbone of modern information exchange and environment perception. Typical applications range from mobile networks and the Internet of Things to radar and sensor grids. The incorporation of machine learning further expands WCS's boundaries, unlocking automated and high-quality data analytics, together with advisable and efficient decision-making. Despite transformative capabilities, wireless systems often face numerous uncertainties in design and operation, such as modeling errors due to incomplete physical knowledge, statistical errors arising from data scarcity, measurement errors caused by sensor imperfections, computational errors owing to resource limitation, and unpredictability of environmental evolution. Once ignored, these uncertainties can lead to severe outcomes, e.g., performance degradation, system untrustworthiness, inefficient resource utilization, and security vulnerabilities. As such, this article reviews mature and emerging architectural, computational, and operational countermeasures, encompassing uncertainty-aware designs of signals and systems (e.g., diversity, adaptivity, modularity), as well as uncertainty-aware modeling and computational frameworks (e.g., risk-informed optimization, robust signal processing, and trustworthy machine learning). Trade-offs to employ these methods, e.g., robustness vs optimality, are also highlighted. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14369v1-abstract-full').style.display = 'none'; document.getElementById('2412.14369v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12760">arXiv:2412.12760</a> <span> [<a href="https://arxiv.org/pdf/2412.12760">pdf</a>, <a href="https://arxiv.org/format/2412.12760">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> CAMEL: Cross-Attention Enhanced Mixture-of-Experts and Language Bias for Code-Switching Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+H">He Wang</a>, <a href="/search/eess?searchtype=author&query=Wan%2C+X">Xucheng Wan</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+N">Naijun Zheng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+K">Kai Liu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+H">Huan Zhou</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guojian Li</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12760v2-abstract-short" style="display: inline;"> Code-switching automatic speech recognition (ASR) aims to transcribe speech that contains two or more languages accurately. To better capture language-specific speech representations and address language confusion in code-switching ASR, the mixture-of-experts (MoE) architecture and an additional language diarization (LD) decoder are commonly employed. However, most researches remain stagnant in si… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12760v2-abstract-full').style.display = 'inline'; document.getElementById('2412.12760v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12760v2-abstract-full" style="display: none;"> Code-switching automatic speech recognition (ASR) aims to transcribe speech that contains two or more languages accurately. To better capture language-specific speech representations and address language confusion in code-switching ASR, the mixture-of-experts (MoE) architecture and an additional language diarization (LD) decoder are commonly employed. However, most researches remain stagnant in simple operations like weighted summation or concatenation to fuse languagespecific speech representations, leaving significant opportunities to explore the enhancement of integrating language bias information. In this paper, we introduce CAMEL, a cross-attention-based MoE and language bias approach for code-switching ASR. Specifically, after each MoE layer, we fuse language-specific speech representations with cross-attention, leveraging its strong contextual modeling abilities. Additionally, we design a source attention-based mechanism to incorporate the language information from the LD decoder output into text embeddings. Experimental results demonstrate that our approach achieves state-of-the-art performance on the SEAME, ASRU200, and ASRU700+LibriSpeech460 Mandarin-English code-switching ASR datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12760v2-abstract-full').style.display = 'none'; document.getElementById('2412.12760v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP 2025. 5 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06666">arXiv:2412.06666</a> <span> [<a href="https://arxiv.org/pdf/2412.06666">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> Diff5T: Benchmarking Human Brain Diffusion MRI with an Extensive 5.0 Tesla K-Space and Spatial Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shanshan Wang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+S">Shoujun Yu</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+J">Jian Cheng</a>, <a href="/search/eess?searchtype=author&query=Jia%2C+S">Sen Jia</a>, <a href="/search/eess?searchtype=author&query=Tie%2C+C">Changjun Tie</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+J">Jiayu Zhu</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+H">Haohao Peng</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+Y">Yijing Dong</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Jianzhong He</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+F">Fan Zhang</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+Y">Yaowen Xing</a>, <a href="/search/eess?searchtype=author&query=Jia%2C+X">Xiuqin Jia</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Q">Qi Yang</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+Q">Qiyuan Tian</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+H">Hua Guo</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guobin Li</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+H">Hairong Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06666v1-abstract-short" style="display: inline;"> Diffusion magnetic resonance imaging (dMRI) provides critical insights into the microstructural and connectional organization of the human brain. However, the availability of high-field, open-access datasets that include raw k-space data for advanced research remains limited. To address this gap, we introduce Diff5T, a first comprehensive 5.0 Tesla diffusion MRI dataset focusing on the human brain… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06666v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06666v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06666v1-abstract-full" style="display: none;"> Diffusion magnetic resonance imaging (dMRI) provides critical insights into the microstructural and connectional organization of the human brain. However, the availability of high-field, open-access datasets that include raw k-space data for advanced research remains limited. To address this gap, we introduce Diff5T, a first comprehensive 5.0 Tesla diffusion MRI dataset focusing on the human brain. This dataset includes raw k-space data and reconstructed diffusion images, acquired using a variety of imaging protocols. Diff5T is designed to support the development and benchmarking of innovative methods in artifact correction, image reconstruction, image preprocessing, diffusion modelling and tractography. The dataset features a wide range of diffusion parameters, including multiple b-values and gradient directions, allowing extensive research applications in studying human brain microstructure and connectivity. With its emphasis on open accessibility and detailed benchmarks, Diff5T serves as a valuable resource for advancing human brain mapping research using diffusion MRI, fostering reproducibility, and enabling collaboration across the neuroscience and medical imaging communities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06666v1-abstract-full').style.display = 'none'; document.getElementById('2412.06666v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 4 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05647">arXiv:2412.05647</a> <span> [<a href="https://arxiv.org/pdf/2412.05647">pdf</a>, <a href="https://arxiv.org/ps/2412.05647">ps</a>, <a href="https://arxiv.org/format/2412.05647">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Deep Reinforcement Learning-Based Resource Allocation for Hybrid Bit and Generative Semantic Communications in Space-Air-Ground Integrated Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chong Huang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xuyang Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+G">Gaojie Chen</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+P">Pei Xiao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+W">Wei Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05647v1-abstract-short" style="display: inline;"> In this paper, we introduce a novel framework consisting of hybrid bit-level and generative semantic communications for efficient downlink image transmission within space-air-ground integrated networks (SAGINs). The proposed model comprises multiple low Earth orbit (LEO) satellites, unmanned aerial vehicles (UAVs), and ground users. Considering the limitations in signal coverage and receiver anten… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05647v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05647v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05647v1-abstract-full" style="display: none;"> In this paper, we introduce a novel framework consisting of hybrid bit-level and generative semantic communications for efficient downlink image transmission within space-air-ground integrated networks (SAGINs). The proposed model comprises multiple low Earth orbit (LEO) satellites, unmanned aerial vehicles (UAVs), and ground users. Considering the limitations in signal coverage and receiver antennas that make the direct communication between satellites and ground users unfeasible in many scenarios, thus UAVs serve as relays and forward images from satellites to the ground users. Our hybrid communication framework effectively combines bit-level transmission with several semantic-level image generation modes, optimizing bandwidth usage to meet stringent satellite link budget constraints and ensure communication reliability and low latency under low signal-to-noise ratio (SNR) conditions. To reduce the transmission delay while ensuring the reconstruction quality at the ground user, we propose a novel metric for measuring delay and reconstruction quality in the proposed system, and employ a deep reinforcement learning (DRL)-based strategy to optimize the resource in the proposed network. Simulation results demonstrate the superiority of the proposed framework in terms of communication resource conservation, reduced latency, and maintaining high image quality, significantly outperforming traditional solutions. Therefore, the proposed framework can ensure that real-time image transmission requirements in SAGINs, even under dynamic network conditions and user demand. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05647v1-abstract-full').style.display = 'none'; document.getElementById('2412.05647v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.04866">arXiv:2412.04866</a> <span> [<a href="https://arxiv.org/pdf/2412.04866">pdf</a>, <a href="https://arxiv.org/ps/2412.04866">ps</a>, <a href="https://arxiv.org/format/2412.04866">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Near-field Communications with Extremely Large-Scale Uniform Arc Arrays: Channel Modelling and Performance Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+G">Guoyu Li</a>, <a href="/search/eess?searchtype=author&query=You%2C+C">Changsheng You</a>, <a href="/search/eess?searchtype=author&query=Shang%2C+G">Guanyu Shang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+S">Shaochuan Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.04866v1-abstract-short" style="display: inline;"> In this letter, we propose a new conformal array architecture, called extremely large-scale uniform arc array (XL-UAA), to improve near-field communication performance. Specifically,under the non-uniform spherical wavefront channel model, we establish mathematical modeling and performance analysis for XL-UAAs. It is shown that XL-UAAs have larger direction-dependent Rayleigh distance and uniform p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04866v1-abstract-full').style.display = 'inline'; document.getElementById('2412.04866v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.04866v1-abstract-full" style="display: none;"> In this letter, we propose a new conformal array architecture, called extremely large-scale uniform arc array (XL-UAA), to improve near-field communication performance. Specifically,under the non-uniform spherical wavefront channel model, we establish mathematical modeling and performance analysis for XL-UAAs. It is shown that XL-UAAs have larger direction-dependent Rayleigh distance and uniform power distance than the conventional XL uniform linear arrays (XL-ULAs). Moreover, a closed-form expression for the signal-to-noise ratio (SNR) is obtained, which depends on collective properties of XL-UAAs, such as the distance between the user and the array center,as well as the arc radius. In addition, we show that the asymptotic SNR of XL-UAAs with the number of antennas depends on the projection distance of the user to the middle of the arc array. Finally, numerical results verify that XL-UAAs achieve a higher SNR than XL-ULAs, especially at larger user incident angles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.04866v1-abstract-full').style.display = 'none'; document.getElementById('2412.04866v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03121">arXiv:2412.03121</a> <span> [<a href="https://arxiv.org/pdf/2412.03121">pdf</a>, <a href="https://arxiv.org/format/2412.03121">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Splats in Splats: Embedding Invisible 3D Watermark within Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yijia Guo</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+W">Wenkai Huang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yang Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gaolei Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Hang Zhang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+L">Liwen Hu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jianhua Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+T">Tiejun Huang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+L">Lei Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03121v1-abstract-short" style="display: inline;"> 3D Gaussian splatting (3DGS) has demonstrated impressive 3D reconstruction performance with explicit scene representations. Given the widespread application of 3DGS in 3D reconstruction and generation tasks, there is an urgent need to protect the copyright of 3DGS assets. However, existing copyright protection techniques for 3DGS overlook the usability of 3D assets, posing challenges for practical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03121v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03121v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03121v1-abstract-full" style="display: none;"> 3D Gaussian splatting (3DGS) has demonstrated impressive 3D reconstruction performance with explicit scene representations. Given the widespread application of 3DGS in 3D reconstruction and generation tasks, there is an urgent need to protect the copyright of 3DGS assets. However, existing copyright protection techniques for 3DGS overlook the usability of 3D assets, posing challenges for practical deployment. Here we describe WaterGS, the first 3DGS watermarking framework that embeds 3D content in 3DGS itself without modifying any attributes of the vanilla 3DGS. To achieve this, we take a deep insight into spherical harmonics (SH) and devise an importance-graded SH coefficient encryption strategy to embed the hidden SH coefficients. Furthermore, we employ a convolutional autoencoder to establish a mapping between the original Gaussian primitives' opacity and the hidden Gaussian primitives' opacity. Extensive experiments indicate that WaterGS significantly outperforms existing 3D steganography techniques, with 5.31% higher scene fidelity and 3X faster rendering speed, while ensuring security, robustness, and user experience. Codes and data will be released at https://water-gs.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03121v1-abstract-full').style.display = 'none'; document.getElementById('2412.03121v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01425">arXiv:2412.01425</a> <span> [<a href="https://arxiv.org/pdf/2412.01425">pdf</a>, <a href="https://arxiv.org/format/2412.01425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Reject Threshold Adaptation for Open-Set Model Attribution of Deepfake Audio </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yan%2C+X">Xinrui Yan</a>, <a href="/search/eess?searchtype=author&query=Yi%2C+J">Jiangyan Yi</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+J">Jianhua Tao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yujie Chen</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+H">Hao Gu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guanjun Li</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+J">Junzuo Zhou</a>, <a href="/search/eess?searchtype=author&query=Ren%2C+Y">Yong Ren</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+T">Tao Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01425v1-abstract-short" style="display: inline;"> Open environment oriented open set model attribution of deepfake audio is an emerging research topic, aiming to identify the generation models of deepfake audio. Most previous work requires manually setting a rejection threshold for unknown classes to compare with predicted probabilities. However, models often overfit training instances and generate overly confident predictions. Moreover, threshol… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01425v1-abstract-full').style.display = 'inline'; document.getElementById('2412.01425v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01425v1-abstract-full" style="display: none;"> Open environment oriented open set model attribution of deepfake audio is an emerging research topic, aiming to identify the generation models of deepfake audio. Most previous work requires manually setting a rejection threshold for unknown classes to compare with predicted probabilities. However, models often overfit training instances and generate overly confident predictions. Moreover, thresholds that effectively distinguish unknown categories in the current dataset may not be suitable for identifying known and unknown categories in another data distribution. To address the issues, we propose a novel framework for open set model attribution of deepfake audio with rejection threshold adaptation (ReTA). Specifically, the reconstruction error learning module trains by combining the representation of system fingerprints with labels corresponding to either the target class or a randomly chosen other class label. This process generates matching and non-matching reconstructed samples, establishing the reconstruction error distributions for each class and laying the foundation for the reject threshold calculation module. The reject threshold calculation module utilizes gaussian probability estimation to fit the distributions of matching and non-matching reconstruction errors. It then computes adaptive reject thresholds for all classes through probability minimization criteria. The experimental results demonstrate the effectiveness of ReTA in improving the open set model attributes of deepfake audio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01425v1-abstract-full').style.display = 'none'; document.getElementById('2412.01425v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ISCSLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01050">arXiv:2412.01050</a> <span> [<a href="https://arxiv.org/pdf/2412.01050">pdf</a>, <a href="https://arxiv.org/format/2412.01050">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Resilience-oriented Planning and Cost Allocation of Energy Storage Integrated with Soft Open Point Based on Resilience Insurance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+B">Bingkai Huang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yuxiong Huang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Q">Qianwen Hu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gengfeng Li</a>, <a href="/search/eess?searchtype=author&query=Bie%2C+Z">Zhaohong Bie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01050v1-abstract-short" style="display: inline;"> In recent years, frequent extreme events have put forward higher requirements for improving the resilience of distribution networks (DNs). Introducing energy storage integrated with soft open point (E-SOP) is one of the effective ways to improve resilience. However, the widespread application of E-SOP is limited by its high investment cost. Based on this, we propose a cost allocation framework and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01050v1-abstract-full').style.display = 'inline'; document.getElementById('2412.01050v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01050v1-abstract-full" style="display: none;"> In recent years, frequent extreme events have put forward higher requirements for improving the resilience of distribution networks (DNs). Introducing energy storage integrated with soft open point (E-SOP) is one of the effective ways to improve resilience. However, the widespread application of E-SOP is limited by its high investment cost. Based on this, we propose a cost allocation framework and optimal planning method of E-SOP in resilient DN. Firstly, a cost allocation mechanism for E-SOP based on resilience insurance service is designed; the probability of power users purchasing resilience insurance service is determined based on the expected utility theory. Then, a four-layer stochastic distributionally robust optimization (SDRO) model is developed for E-SOP planning and insurance pricing strategy, where the uncertainty in the intensity of contingent extreme events is addressed by a stochastic optimization approach, while the uncertainty in the occurrence of outages and resilience insurance purchases resulting from a specific extreme event is addressed via a distributionally robust optimization approach. Finally, The effectiveness of the proposed model is verified on the modified IEEE 33-bus DN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01050v1-abstract-full').style.display = 'none'; document.getElementById('2412.01050v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE PESGM 2025 for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00656">arXiv:2412.00656</a> <span> [<a href="https://arxiv.org/pdf/2412.00656">pdf</a>, <a href="https://arxiv.org/format/2412.00656">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Two-Stage Adaptive Robust Optimization Model for Joint Unit Maintenance and Unit Commitment Considering Source-Load Uncertainty </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lu%2C+H">Hongrui Lu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yuxiong Huang</a>, <a href="/search/eess?searchtype=author&query=He%2C+T">Tong He</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gengfeng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00656v1-abstract-short" style="display: inline;"> Unit maintenance and unit commitment are two critical and interrelated aspects of electric power system operation, both of which face the challenge of coordinating efforts to enhance reliability and economic performance. This challenge becomes increasingly pronounced in the context of increased integration of renewable energy and flexible loads, such as wind power and electric vehicles, into the p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00656v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00656v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00656v1-abstract-full" style="display: none;"> Unit maintenance and unit commitment are two critical and interrelated aspects of electric power system operation, both of which face the challenge of coordinating efforts to enhance reliability and economic performance. This challenge becomes increasingly pronounced in the context of increased integration of renewable energy and flexible loads, such as wind power and electric vehicles, into the power system, where high uncertainty is prevalent. To tackle this issue, this paper develops a two-stage adaptive robust optimization model for the joint unit maintenance and unit commitment strategy. The first stage focuses on making joint decisions regarding unit maintenance and unit commitment, while the second stage addresses economic dispatch under the worst-case scenarios of wind power and load demand. Then a practical solution methodology is proposed to solve this model efficiently, which combines the inexact column-and-constraint generation algorithm with an outer approximation method. Finally, the economic viability and adaptability of the proposed method is demonstrated based on the RTS-79 test system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00656v1-abstract-full').style.display = 'none'; document.getElementById('2412.00656v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00082">arXiv:2412.00082</a> <span> [<a href="https://arxiv.org/pdf/2412.00082">pdf</a>, <a href="https://arxiv.org/format/2412.00082">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Dual Prototyping with Domain and Class Prototypes for Affective Brain-Computer Interface in Unseen Target Conditions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+G">Guangli Li</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Z">Zhehao Zhou</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+T">Tuo Sun</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+P">Ping Tan</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Li Zhang</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+Z">Zhen Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00082v1-abstract-short" style="display: inline;"> EEG signals have emerged as a powerful tool in affective brain-computer interfaces, playing a crucial role in emotion recognition. However, current deep transfer learning-based methods for EEG recognition face challenges due to the reliance of both source and target data in model learning, which significantly affect model performance and generalization. To overcome this limitation, we propose a no… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00082v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00082v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00082v1-abstract-full" style="display: none;"> EEG signals have emerged as a powerful tool in affective brain-computer interfaces, playing a crucial role in emotion recognition. However, current deep transfer learning-based methods for EEG recognition face challenges due to the reliance of both source and target data in model learning, which significantly affect model performance and generalization. To overcome this limitation, we propose a novel framework (PL-DCP) and introduce the concepts of feature disentanglement and prototype inference. The dual prototyping mechanism incorporates both domain and class prototypes: domain prototypes capture individual variations across subjects, while class prototypes represent the ideal class distributions within their respective domains. Importantly, the proposed PL-DCP framework operates exclusively with source data during training, meaning that target data remains completely unseen throughout the entire process. To address label noise, we employ a pairwise learning strategy that encodes proximity relationships between sample pairs, effectively reducing the influence of mislabeled data. Experimental validation on the SEED and SEED-IV datasets demonstrates that PL-DCP, despite not utilizing target data during training, achieves performance comparable to deep transfer learning methods that require both source and target data. This highlights the potential of PL-DCP as an effective and robust approach for EEG-based emotion recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00082v1-abstract-full').style.display = 'none'; document.getElementById('2412.00082v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.19385">arXiv:2411.19385</a> <span> [<a href="https://arxiv.org/pdf/2411.19385">pdf</a>, <a href="https://arxiv.org/format/2411.19385">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Zero-Forget Preservation of Semantic Communication Alignment in Distributed AI Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+J">Jingzhi Hu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.19385v1-abstract-short" style="display: inline;"> Future communication networks are expected to connect massive distributed artificial intelligence (AI). Exploiting aligned priori knowledge of AI pairs, it is promising to convert high-dimensional data transmission into highly-compressed semantic communications (SC). However, to accommodate the local data distribution and user preferences, AIs generally adapt to different domains, which fundamenta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19385v1-abstract-full').style.display = 'inline'; document.getElementById('2411.19385v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.19385v1-abstract-full" style="display: none;"> Future communication networks are expected to connect massive distributed artificial intelligence (AI). Exploiting aligned priori knowledge of AI pairs, it is promising to convert high-dimensional data transmission into highly-compressed semantic communications (SC). However, to accommodate the local data distribution and user preferences, AIs generally adapt to different domains, which fundamentally distorts the SC alignment. In this paper, we propose a zero-forget domain adaptation (ZFDA) framework to preserve SC alignment. To prevent the DA from changing substantial neural parameters of AI, we design sparse additive modifications (SAM) to the parameters, which can be efficiently stored and switched-off to restore the SC alignment. To optimize the SAM, we decouple it into tractable continuous variables and a binary mask, and then handle the binary mask by a score-based optimization. Experimental evaluations on a SC system for image transmissions validate that the proposed framework perfectly preserves the SC alignment with almost no loss of DA performance, even improved in some cases, at a cost of less than 1% of additional memory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19385v1-abstract-full').style.display = 'none'; document.getElementById('2411.19385v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18153">arXiv:2411.18153</a> <span> [<a href="https://arxiv.org/pdf/2411.18153">pdf</a>, <a href="https://arxiv.org/ps/2411.18153">ps</a>, <a href="https://arxiv.org/format/2411.18153">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Learning Rate-Compatible Linear Block Codes: An Auto-Encoder Based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cheng%2C+Y">Yukun Cheng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/eess?searchtype=author&query=Hou%2C+T">Tianwei Hou</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a>, <a href="/search/eess?searchtype=author&query=Ai%2C+B">Bo Ai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18153v1-abstract-short" style="display: inline;"> Artificial intelligence (AI) provides an alternative way to design channel coding with affordable complexity. However, most existing studies can only learn codes for a given size and rate, typically defined by a fixed network architecture and a set of parameters. The support of multiple code rates is essential for conserving bandwidth under varying channel conditions while it is costly to store mu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18153v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18153v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18153v1-abstract-full" style="display: none;"> Artificial intelligence (AI) provides an alternative way to design channel coding with affordable complexity. However, most existing studies can only learn codes for a given size and rate, typically defined by a fixed network architecture and a set of parameters. The support of multiple code rates is essential for conserving bandwidth under varying channel conditions while it is costly to store multiple AI models or parameter sets. In this article, we propose an auto-encoder (AE) based rate-compatible linear block codes (RC-LBCs). The coding process associated with AI or non-AI decoders and multiple puncturing patterns is optimized in a data-driven manner. The superior performance of the proposed AI-based RC-LBC is demonstrated through our numerical experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18153v1-abstract-full').style.display = 'none'; document.getElementById('2411.18153v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17990">arXiv:2411.17990</a> <span> [<a href="https://arxiv.org/pdf/2411.17990">pdf</a>, <a href="https://arxiv.org/format/2411.17990">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Beam Switching Based Beam Design for High-Speed Train mmWave Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jingjia Huang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+C">Chenhao Qi</a>, <a href="/search/eess?searchtype=author&query=Dobre%2C+O+A">Octavia A. Dobre</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17990v1-abstract-short" style="display: inline;"> For high-speed train (HST) millimeter wave (mmWave) communications, the use of narrow beams with small beam coverage needs frequent beam switching, while wider beams with small beam gain leads to weaker mmWave signal strength. In this paper, we consider beam switching based beam design, which is formulated as an optimization problem aiming to minimize the number of switched beams within a predeter… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17990v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17990v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17990v1-abstract-full" style="display: none;"> For high-speed train (HST) millimeter wave (mmWave) communications, the use of narrow beams with small beam coverage needs frequent beam switching, while wider beams with small beam gain leads to weaker mmWave signal strength. In this paper, we consider beam switching based beam design, which is formulated as an optimization problem aiming to minimize the number of switched beams within a predetermined railway range subject to that the receiving signal-to-noise ratio (RSNR) at the HST is no lower than a predetermined threshold. To solve this problem, we propose two sequential beam design schemes, both including two alternately-performed stages. In the first stage, given an updated beam coverage according to the railway range, we transform the problem into a feasibility problem and further convert it into a min-max optimization problem by relaxing the RSNR constraints into a penalty of the objective function. In the second stage, we evaluate the feasibility of the beamformer obtained from solving the min-max problem and determine the beam coverage accordingly. Simulation results show that compared to the first scheme, the second scheme can achieve 96.20\% reduction in computational complexity at the cost of only 0.0657\% performance degradation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17990v1-abstract-full').style.display = 'none'; document.getElementById('2411.17990v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15255">arXiv:2411.15255</a> <span> [<a href="https://arxiv.org/pdf/2411.15255">pdf</a>, <a href="https://arxiv.org/format/2411.15255">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OSMamba: Omnidirectional Spectral Mamba with Dual-Domain Prior Generator for Exposure Correction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+G">Gehui Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+B">Bin Chen</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+C">Chen Zhao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jian Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15255v1-abstract-short" style="display: inline;"> Exposure correction is a fundamental problem in computer vision and image processing. Recently, frequency domain-based methods have achieved impressive improvement, yet they still struggle with complex real-world scenarios under extreme exposure conditions. This is due to the local convolutional receptive fields failing to model long-range dependencies in the spectrum, and the non-generative learn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15255v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15255v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15255v1-abstract-full" style="display: none;"> Exposure correction is a fundamental problem in computer vision and image processing. Recently, frequency domain-based methods have achieved impressive improvement, yet they still struggle with complex real-world scenarios under extreme exposure conditions. This is due to the local convolutional receptive fields failing to model long-range dependencies in the spectrum, and the non-generative learning paradigm being inadequate for retrieving lost details from severely degraded regions. In this paper, we propose Omnidirectional Spectral Mamba (OSMamba), a novel exposure correction network that incorporates the advantages of state space models and generative diffusion models to address these limitations. Specifically, OSMamba introduces an omnidirectional spectral scanning mechanism that adapts Mamba to the frequency domain to capture comprehensive long-range dependencies in both the amplitude and phase spectra of deep image features, hence enhancing illumination correction and structure recovery. Furthermore, we develop a dual-domain prior generator that learns from well-exposed images to generate a degradation-free diffusion prior containing correct information about severely under- and over-exposed regions for better detail restoration. Extensive experiments on multiple-exposure and mixed-exposure datasets demonstrate that the proposed OSMamba achieves state-of-the-art performance both quantitatively and qualitatively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15255v1-abstract-full').style.display = 'none'; document.getElementById('2411.15255v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15238">arXiv:2411.15238</a> <span> [<a href="https://arxiv.org/pdf/2411.15238">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Analysis of the impact of heterogeneous platoon for mixed traffic flow: control strategy, fuel consumption and emissions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yunxia Wu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+L">Le Li</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+Z">Zhihong Yao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yi Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gen Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Y">Yangsheng Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15238v1-abstract-short" style="display: inline;"> Compared with traditional vehicle longitudinal spacing control strategies, the combination spacing strategy can integrate the advantages of different spacing control strategies. However, the impact mechanism of different combination spacing control strategies on mixed traffic flow has not been analyzed yet. Therefore, this paper proposes various combination spacing control strategies for connected… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15238v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15238v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15238v1-abstract-full" style="display: none;"> Compared with traditional vehicle longitudinal spacing control strategies, the combination spacing strategy can integrate the advantages of different spacing control strategies. However, the impact mechanism of different combination spacing control strategies on mixed traffic flow has not been analyzed yet. Therefore, this paper proposes various combination spacing control strategies for connected automated vehicles (CAVs). First, a mixed traffic flow model was developed to analyze the characteristics of CAV platoons. On this basis, a probability model of vehicle distribution was derived, and its effectiveness was verified through simulation. Then, multiple spacing combination strategies are proposed based on four spacing control strategies. Finally, numerical experiments were conducted to calculate the average fuel consumption and pollutant emissions of mixed traffic flow under different spacing control strategies, and the impact of platoon spacing control strategies on traffic flow fuel consumption and pollutant emissions was further analyzed. Results show that: (1) the differences in average fuel consumption and pollutant emissions of traffic flow are relatively small under different platoon spacing control strategies under low traffic density (i.e., 15 veh/km); (2) at medium to high traffic densities (i.e., 55-95 veh/km), when the penetration rate of CAVs exceeds 80%, VTG1-CS, VTG2-CS, and CTG-CS strategies can effectively ensure traffic flow stability and safety, and significantly reduce fuel consumption and pollutant emissions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15238v1-abstract-full').style.display = 'none'; document.getElementById('2411.15238v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13383">arXiv:2411.13383</a> <span> [<a href="https://arxiv.org/pdf/2411.13383">pdf</a>, <a href="https://arxiv.org/format/2411.13383">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Diffusion Compression for Real-World Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+B">Bin Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gehui Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+R">Rongyuan Wu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xindong Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jie Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jian Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Lei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13383v1-abstract-short" style="display: inline;"> Real-world image super-resolution (Real-ISR) aims to reconstruct high-resolution images from low-resolution inputs degraded by complex, unknown processes. While many Stable Diffusion (SD)-based Real-ISR methods have achieved remarkable success, their slow, multi-step inference hinders practical deployment. Recent SD-based one-step networks like OSEDiff and S3Diff alleviate this issue but still inc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13383v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13383v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13383v1-abstract-full" style="display: none;"> Real-world image super-resolution (Real-ISR) aims to reconstruct high-resolution images from low-resolution inputs degraded by complex, unknown processes. While many Stable Diffusion (SD)-based Real-ISR methods have achieved remarkable success, their slow, multi-step inference hinders practical deployment. Recent SD-based one-step networks like OSEDiff and S3Diff alleviate this issue but still incur high computational costs due to their reliance on large pretrained SD models. This paper proposes a novel Real-ISR method, AdcSR, by distilling the one-step diffusion network OSEDiff into a streamlined diffusion-GAN model under our Adversarial Diffusion Compression (ADC) framework. We meticulously examine the modules of OSEDiff, categorizing them into two types: (1) Removable (VAE encoder, prompt extractor, text encoder, etc.) and (2) Prunable (denoising UNet and VAE decoder). Since direct removal and pruning can degrade the model's generation capability, we pretrain our pruned VAE decoder to restore its ability to decode images and employ adversarial distillation to compensate for performance loss. This ADC-based diffusion-GAN hybrid design effectively reduces complexity by 73% in inference time, 78% in computation, and 74% in parameters, while preserving the model's generation capability. Experiments manifest that our proposed AdcSR achieves competitive recovery quality on both synthetic and real-world datasets, offering up to 9.3$\times$ speedup over previous one-step diffusion-based methods. Code and models will be made available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13383v1-abstract-full').style.display = 'none'; document.getElementById('2411.13383v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10831">arXiv:2411.10831</a> <span> [<a href="https://arxiv.org/pdf/2411.10831">pdf</a>, <a href="https://arxiv.org/format/2411.10831">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Neighboring Slice Noise2Noise: Self-Supervised Medical Image Denoising from Single Noisy Image Volume </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhou%2C+L">Langrui Zhou</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Z">Ziteng Zhou</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+X">Xinyu Huang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiangyu Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Huiru Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10831v2-abstract-short" style="display: inline;"> In the last few years, with the rapid development of deep learning technologies, supervised methods based on convolutional neural networks have greatly enhanced the performance of medical image denoising. However, these methods require large quantities of noisy-clean image pairs for training, which greatly limits their practicality. Although some researchers have attempted to train denoising netwo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10831v2-abstract-full').style.display = 'inline'; document.getElementById('2411.10831v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10831v2-abstract-full" style="display: none;"> In the last few years, with the rapid development of deep learning technologies, supervised methods based on convolutional neural networks have greatly enhanced the performance of medical image denoising. However, these methods require large quantities of noisy-clean image pairs for training, which greatly limits their practicality. Although some researchers have attempted to train denoising networks using only single noisy images, existing self-supervised methods, including blind-spot-based and data-splitting-based methods, heavily rely on the assumption that noise is pixel-wise independent. However, this assumption often does not hold in real-world medical images. Therefore, in the field of medical imaging, there remains a lack of simple and practical denoising methods that can achieve high-quality denoising performance using only single noisy images. In this paper, we propose a novel self-supervised medical image denoising method, Neighboring Slice Noise2Noise (NS-N2N). The proposed method utilizes neighboring slices within a single noisy image volume to construct weighted training data, and then trains the denoising network using a self-supervised scheme with regional consistency loss and inter-slice continuity loss. NS-N2N only requires a single noisy image volume obtained from one medical imaging procedure to achieve high-quality denoising of the image volume itself. Extensive experiments demonstrate that the proposed method outperforms state-of-the-art self-supervised denoising methods in both denoising performance and processing efficiency. Furthermore, since NS-N2N operates solely in the image domain, it is free from device-specific issues such as reconstruction geometry, making it easier to apply in various clinical practices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10831v2-abstract-full').style.display = 'none'; document.getElementById('2411.10831v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06564">arXiv:2411.06564</a> <span> [<a href="https://arxiv.org/pdf/2411.06564">pdf</a>, <a href="https://arxiv.org/format/2411.06564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Robust Beamforming with Application in High-Resolution Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shixiong Wang</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+W">Wei Dai</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06564v1-abstract-short" style="display: inline;"> As a fundamental technique in array signal processing, beamforming plays a crucial role in amplifying signals of interest while mitigating interference and noise. When uncertainties exist in the signal model or the data size of snapshots is limited, the performance of beamformers significantly degrades. In this article, we comprehensively study the conceptual system, theoretical analysis, and algo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06564v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06564v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06564v1-abstract-full" style="display: none;"> As a fundamental technique in array signal processing, beamforming plays a crucial role in amplifying signals of interest while mitigating interference and noise. When uncertainties exist in the signal model or the data size of snapshots is limited, the performance of beamformers significantly degrades. In this article, we comprehensively study the conceptual system, theoretical analysis, and algorithmic design for robust beamforming. Particularly, four technical approaches for robust beamforming are discussed, including locally robust beamforming, globally robust beamforming, regularized beamforming, and Bayesian-nonparametric beamforming. In addition, we investigate the equivalence among the methods and suggest a unified robust beamforming framework. As an application example, we show that the resolution of robust beamformers for direction-of-arrival (DoA) estimation can be greatly refined by incorporating the characteristics of subspace methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06564v1-abstract-full').style.display = 'none'; document.getElementById('2411.06564v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12419">arXiv:2410.12419</a> <span> [<a href="https://arxiv.org/pdf/2410.12419">pdf</a>, <a href="https://arxiv.org/format/2410.12419">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Mind the Context: Attention-Guided Weak-to-Strong Consistency for Enhanced Semi-Supervised Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cheng%2C+Y">Yuxuan Cheng</a>, <a href="/search/eess?searchtype=author&query=Shao%2C+C">Chenxi Shao</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+J">Jie Ma</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guoliang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12419v2-abstract-short" style="display: inline;"> Medical image segmentation is a pivotal step in diagnostic and therapeutic processes, relying on high-quality annotated data that is often challenging and costly to obtain. Semi-supervised learning offers a promising approach to enhance model performance by leveraging unlabeled data. Although weak-to-strong consistency is a prevalent method in semi-supervised image segmentation, there is a scarcit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12419v2-abstract-full').style.display = 'inline'; document.getElementById('2410.12419v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12419v2-abstract-full" style="display: none;"> Medical image segmentation is a pivotal step in diagnostic and therapeutic processes, relying on high-quality annotated data that is often challenging and costly to obtain. Semi-supervised learning offers a promising approach to enhance model performance by leveraging unlabeled data. Although weak-to-strong consistency is a prevalent method in semi-supervised image segmentation, there is a scarcity of research on perturbation strategies specifically tailored for semi-supervised medical image segmentation tasks. To address this challenge, this paper introduces a simple yet efficient semi-supervised learning framework named Attention-Guided weak-to-strong Consistency Match (AIGCMatch). The AIGCMatch framework incorporates attention-guided perturbation strategies at both the image and feature levels to achieve weak-to-strong consistency regularization. This method not only preserves the structural information of medical images but also enhances the model's ability to process complex semantic information. Extensive experiments conducted on the ACDC and ISIC-2017 datasets have validated the effectiveness of AIGCMatch. Our method achieved a 90.4\% Dice score in the 7-case scenario on the ACDC dataset, surpassing the state-of-the-art methods and demonstrating its potential and efficacy in clinical settings. Additionally, on the ISIC-2017 dataset, we significantly outperformed our baseline, indicating the robustness and generalizability of AIGCMatch across different medical image segmentation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12419v2-abstract-full').style.display = 'none'; document.getElementById('2410.12419v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11736">arXiv:2410.11736</a> <span> [<a href="https://arxiv.org/pdf/2410.11736">pdf</a>, <a href="https://arxiv.org/format/2410.11736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Near-Field Communications for Extremely Large-Scale MIMO: A Beamspace Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+K">Kangjian Chen</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+C">Chenhao Qi</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jingjia Huang</a>, <a href="/search/eess?searchtype=author&query=Dobre%2C+O+A">Octavia A. Dobre</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11736v1-abstract-short" style="display: inline;"> Extremely large-scale multiple-input multiple-output (XL-MIMO) is regarded as one of the key techniques to enhance the performance of future wireless communications. Different from regular MIMO, the XL-MIMO shifts part of the communication region from the far field to the near field, where the spherical-wave channel model cannot be accurately approximated by the commonly-adopted planar-wave channe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11736v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11736v1-abstract-full" style="display: none;"> Extremely large-scale multiple-input multiple-output (XL-MIMO) is regarded as one of the key techniques to enhance the performance of future wireless communications. Different from regular MIMO, the XL-MIMO shifts part of the communication region from the far field to the near field, where the spherical-wave channel model cannot be accurately approximated by the commonly-adopted planar-wave channel model. As a result, the well-explored far-field beamspace is unsuitable for near-field communications, thereby requiring the exploration of specialized near-field beamspace. In this article, we investigate the near-field communications for XL-MIMO from the perspective of beamspace. Given the spherical wavefront characteristics of the near-field channels, we first map the antenna space to the near-field beamspace with the fractional Fourier transform. Then, we divide the near-field beamspace into three parts, including high mainlobe, low mainlobe, and sidelobe, and provide a comprehensive analysis of these components. Based on the analysis, we demonstrate the advantages of the near-field beamspace over the existing methods. Finally, we point out several applications of the near-field beamspace and highlight some potential directions for future study in the near-field beamspace. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11736v1-abstract-full').style.display = 'none'; document.getElementById('2410.11736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03962">arXiv:2410.03962</a> <span> [<a href="https://arxiv.org/pdf/2410.03962">pdf</a>, <a href="https://arxiv.org/format/2410.03962">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SpecSAR-Former: A Lightweight Transformer-based Network for Global LULC Mapping Using Integrated Sentinel-1 and Sentinel-2 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+H">Hao Yu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gen Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Haoyu Liu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+S">Songyan Zhu</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+W">Wenquan Dong</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Changjian Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03962v1-abstract-short" style="display: inline;"> Recent approaches in remote sensing have increasingly focused on multimodal data, driven by the growing availability of diverse earth observation datasets. Integrating complementary information from different modalities has shown substantial potential in enhancing semantic understanding. However, existing global multimodal datasets often lack the inclusion of Synthetic Aperture Radar (SAR) data, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03962v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03962v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03962v1-abstract-full" style="display: none;"> Recent approaches in remote sensing have increasingly focused on multimodal data, driven by the growing availability of diverse earth observation datasets. Integrating complementary information from different modalities has shown substantial potential in enhancing semantic understanding. However, existing global multimodal datasets often lack the inclusion of Synthetic Aperture Radar (SAR) data, which excels at capturing texture and structural details. SAR, as a complementary perspective to other modalities, facilitates the utilization of spatial information for global land use and land cover (LULC). To address this gap, we introduce the Dynamic World+ dataset, expanding the current authoritative multispectral dataset, Dynamic World, with aligned SAR data. Additionally, to facilitate the combination of multispectral and SAR data, we propose a lightweight transformer architecture termed SpecSAR-Former. It incorporates two innovative modules, Dual Modal Enhancement Module (DMEM) and Mutual Modal Aggregation Module (MMAM), designed to exploit cross-information between the two modalities in a split-fusion manner. These modules enhance the model's ability to integrate spectral and spatial information, thereby improving the overall performance of global LULC semantic segmentation. Furthermore, we adopt an imbalanced parameter allocation strategy that assigns parameters to different modalities based on their importance and information density. Extensive experiments demonstrate that our network outperforms existing transformer and CNN-based models, achieving a mean Intersection over Union (mIoU) of 59.58%, an Overall Accuracy (OA) of 79.48%, and an F1 Score of 71.68% with only 26.70M parameters. The code will be available at https://github.com/Reagan1311/LULC_segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03962v1-abstract-full').style.display = 'none'; document.getElementById('2410.03962v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19276">arXiv:2409.19276</a> <span> [<a href="https://arxiv.org/pdf/2409.19276">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning-based Automated Diagnosis of Obstructive Sleep Apnea and Sleep Stage Classification in Children Using Millimeter-wave Radar and Pulse Oximeter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/eess?searchtype=author&query=Song%2C+R">Ruobing Song</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yunxiao Wu</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+L">Li Zheng</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+W">Wenyu Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhaoxi Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gang Li</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Z">Zhifei Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19276v2-abstract-short" style="display: inline;"> Study Objectives: To evaluate the agreement between the millimeter-wave radar-based device and polysomnography (PSG) in diagnosis of obstructive sleep apnea (OSA) and classification of sleep stage in children. Methods: 281 children, aged 1 to 18 years, who underwent sleep monitoring between September and November 2023 at the Sleep Center of Beijing Children's Hospital, Capital Medical University,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19276v2-abstract-full').style.display = 'inline'; document.getElementById('2409.19276v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19276v2-abstract-full" style="display: none;"> Study Objectives: To evaluate the agreement between the millimeter-wave radar-based device and polysomnography (PSG) in diagnosis of obstructive sleep apnea (OSA) and classification of sleep stage in children. Methods: 281 children, aged 1 to 18 years, who underwent sleep monitoring between September and November 2023 at the Sleep Center of Beijing Children's Hospital, Capital Medical University, were recruited in the study. All enrolled children underwent sleep monitoring by PSG and the millimeter-wave radar-based device, QSA600, simultaneously. QSA600 recordings were automatically analyzed using a deep learning model meanwhile the PSG data was manually scored. Results: The Obstructive Apnea-Hypopnea Index (OAHI) obtained from QSA600 and PSG demonstrates a high level of agreement with an intraclass correlation coefficient of 0.945 (95% CI: 0.93 to 0.96). Bland-Altman analysis indicates that the mean difference of OAHI between QSA600 and PSG is -0.10 events/h (95% CI: -11.15 to 10.96). The deep learning model evaluated through cross-validation showed good sensitivity (81.8%, 84.3% and 89.7%) and specificity (90.5%, 95.3% and 97.1%) values for diagnosing children with OAHI>1, OAHI>5 and OAHI>10. The area under the receiver operating characteristic curve is 0.923, 0.955 and 0.988, respectively. For sleep stage classification, the model achieved Kappa coefficients of 0.854, 0.781, and 0.734, with corresponding overall accuracies of 95.0%, 84.8%, and 79.7% for Wake-sleep classification, Wake-REM-Light-Deep classification, and Wake-REM-N1-N2 N3 classification, respectively. Conclusions: QSA600 has demonstrated high agreement with PSG in diagnosing OSA and performing sleep staging in children. The device is portable, low-load and suitable for follow up and long-term pediatric sleep assessment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19276v2-abstract-full').style.display = 'none'; document.getElementById('2409.19276v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19217">arXiv:2409.19217</a> <span> [<a href="https://arxiv.org/pdf/2409.19217">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Detection of Sleep Apnea-Hypopnea Events Using Millimeter-wave Radar and Pulse Oximeter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chenyang Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhaoxi Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+W">Wenyu Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zetao Wang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+X">Xi Guo</a>, <a href="/search/eess?searchtype=author&query=Guan%2C+J">Jian Guan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19217v1-abstract-short" style="display: inline;"> Obstructive Sleep Apnea-Hypopnea Syndrome (OSAHS) is a sleep-related breathing disorder associated with significant morbidity and mortality worldwide. The gold standard for OSAHS diagnosis, polysomnography (PSG), faces challenges in popularization due to its high cost and complexity. Recently, radar has shown potential in detecting sleep apnea-hypopnea events (SAE) with the advantages of low cost… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19217v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19217v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19217v1-abstract-full" style="display: none;"> Obstructive Sleep Apnea-Hypopnea Syndrome (OSAHS) is a sleep-related breathing disorder associated with significant morbidity and mortality worldwide. The gold standard for OSAHS diagnosis, polysomnography (PSG), faces challenges in popularization due to its high cost and complexity. Recently, radar has shown potential in detecting sleep apnea-hypopnea events (SAE) with the advantages of low cost and non-contact monitoring. However, existing studies, especially those using deep learning, employ segment-based classification approach for SAE detection, making the task of event quantity estimation difficult. Additionally, radar-based SAE detection is susceptible to interference from body movements and the environment. Oxygen saturation (SpO2) can offer valuable information about OSAHS, but it also has certain limitations and cannot be used alone for diagnosis. In this study, we propose a method using millimeter-wave radar and pulse oximeter to detect SAE, called ROSA. It fuses information from both sensors, and directly predicts the temporal localization of SAE. Experimental results demonstrate a high degree of consistency (ICC=0.9864) between AHI from ROSA and PSG. This study presents an effective method with low-load device for the diagnosis of OSAHS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19217v1-abstract-full').style.display = 'none'; document.getElementById('2409.19217v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11909">arXiv:2409.11909</a> <span> [<a href="https://arxiv.org/pdf/2409.11909">pdf</a>, <a href="https://arxiv.org/format/2409.11909">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Mixture of Experts Fusion for Fake Audio Detection Using Frozen wav2vec 2.0 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhiyong Wang</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+R">Ruibo Fu</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+Z">Zhengqi Wen</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+J">Jianhua Tao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaopeng Wang</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+Y">Yuankun Xie</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+X">Xin Qi</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+S">Shuchen Shi</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Y">Yi Lu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yukun Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chenxing Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xuefei Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guanjun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11909v1-abstract-short" style="display: inline;"> Speech synthesis technology has posed a serious threat to speaker verification systems. Currently, the most effective fake audio detection methods utilize pretrained models, and integrating features from various layers of pretrained model further enhances detection performance. However, most of the previously proposed fusion methods require fine-tuning the pretrained models, resulting in exces… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11909v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11909v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11909v1-abstract-full" style="display: none;"> Speech synthesis technology has posed a serious threat to speaker verification systems. Currently, the most effective fake audio detection methods utilize pretrained models, and integrating features from various layers of pretrained model further enhances detection performance. However, most of the previously proposed fusion methods require fine-tuning the pretrained models, resulting in excessively long training times and hindering model iteration when facing new speech synthesis technology. To address this issue, this paper proposes a feature fusion method based on the Mixture of Experts, which extracts and integrates features relevant to fake audio detection from layer features, guided by a gating network based on the last layer feature, while freezing the pretrained model. Experiments conducted on the ASVspoof2019 and ASVspoof2021 datasets demonstrate that the proposed method achieves competitive performance compared to those requiring fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11909v1-abstract-full').style.display = 'none'; document.getElementById('2409.11909v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ICASSP2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11835">arXiv:2409.11835</a> <span> [<a href="https://arxiv.org/pdf/2409.11835">pdf</a>, <a href="https://arxiv.org/format/2409.11835">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DPI-TTS: Directional Patch Interaction for Fast-Converging and Style Temporal Modeling in Text-to-Speech </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+X">Xin Qi</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+R">Ruibo Fu</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+Z">Zhengqi Wen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/eess?searchtype=author&query=Qiang%2C+C">Chunyu Qiang</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+J">Jianhua Tao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chenxing Li</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Y">Yi Lu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+S">Shuchen Shi</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhiyong Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaopeng Wang</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+Y">Yuankun Xie</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yukun Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xuefei Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guanjun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11835v1-abstract-short" style="display: inline;"> In recent years, speech diffusion models have advanced rapidly. Alongside the widely used U-Net architecture, transformer-based models such as the Diffusion Transformer (DiT) have also gained attention. However, current DiT speech models treat Mel spectrograms as general images, which overlooks the specific acoustic properties of speech. To address these limitations, we propose a method called Dir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11835v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11835v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11835v1-abstract-full" style="display: none;"> In recent years, speech diffusion models have advanced rapidly. Alongside the widely used U-Net architecture, transformer-based models such as the Diffusion Transformer (DiT) have also gained attention. However, current DiT speech models treat Mel spectrograms as general images, which overlooks the specific acoustic properties of speech. To address these limitations, we propose a method called Directional Patch Interaction for Text-to-Speech (DPI-TTS), which builds on DiT and achieves fast training without compromising accuracy. Notably, DPI-TTS employs a low-to-high frequency, frame-by-frame progressive inference approach that aligns more closely with acoustic properties, enhancing the naturalness of the generated speech. Additionally, we introduce a fine-grained style temporal modeling method that further improves speaker style similarity. Experimental results demonstrate that our method increases the training speed by nearly 2 times and significantly outperforms the baseline models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11835v1-abstract-full').style.display = 'none'; document.getElementById('2409.11835v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICASSP2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09381">arXiv:2409.09381</a> <span> [<a href="https://arxiv.org/pdf/2409.09381">pdf</a>, <a href="https://arxiv.org/format/2409.09381">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Text Prompt is Not Enough: Sound Event Enhanced Prompt Adapter for Target Style Audio Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+C">Chenxu Xiong</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+R">Ruibo Fu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+S">Shuchen Shi</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+Z">Zhengqi Wen</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+J">Jianhua Tao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chenxing Li</a>, <a href="/search/eess?searchtype=author&query=Qiang%2C+C">Chunyu Qiang</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+Y">Yuankun Xie</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+X">Xin Qi</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guanjun Li</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Z">Zizheng Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09381v1-abstract-short" style="display: inline;"> Current mainstream audio generation methods primarily rely on simple text prompts, often failing to capture the nuanced details necessary for multi-style audio generation. To address this limitation, the Sound Event Enhanced Prompt Adapter is proposed. Unlike traditional static global style transfer, this method extracts style embedding through cross-attention between text and reference audio for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09381v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09381v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09381v1-abstract-full" style="display: none;"> Current mainstream audio generation methods primarily rely on simple text prompts, often failing to capture the nuanced details necessary for multi-style audio generation. To address this limitation, the Sound Event Enhanced Prompt Adapter is proposed. Unlike traditional static global style transfer, this method extracts style embedding through cross-attention between text and reference audio for adaptive style control. Adaptive layer normalization is then utilized to enhance the model's capacity to express multiple styles. Additionally, the Sound Event Reference Style Transfer Dataset (SERST) is introduced for the proposed target style audio generation task, enabling dual-prompt audio generation using both text and audio references. Experimental results demonstrate the robustness of the model, achieving state-of-the-art Fr茅chet Distance of 26.94 and KL Divergence of 1.82, surpassing Tango, AudioLDM, and AudioGen. Furthermore, the generated audio shows high similarity to its corresponding audio reference. The demo, code, and dataset are publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09381v1-abstract-full').style.display = 'none'; document.getElementById('2409.09381v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures, submitted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06847">arXiv:2409.06847</a> <span> [<a href="https://arxiv.org/pdf/2409.06847">pdf</a>, <a href="https://arxiv.org/ps/2409.06847">ps</a>, <a href="https://arxiv.org/format/2409.06847">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Downlink Beamforming for Cell-Free ISAC: A Fast Complex Oblique Manifold Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zargari%2C+S">Shayan Zargari</a>, <a href="/search/eess?searchtype=author&query=Galappaththige%2C+D">Diluka Galappaththige</a>, <a href="/search/eess?searchtype=author&query=Tellambura%2C+C">Chintha Tellambura</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06847v1-abstract-short" style="display: inline;"> Cell-free integrated sensing and communication (CF-ISAC) systems are just emerging as an interesting technique for future communications. Such a system comprises several multiple-antenna access points (APs), serving multiple single-antenna communication users and sensing targets. However, efficient beamforming designs that achieve high precision and robust performance in densely populated networks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06847v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06847v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06847v1-abstract-full" style="display: none;"> Cell-free integrated sensing and communication (CF-ISAC) systems are just emerging as an interesting technique for future communications. Such a system comprises several multiple-antenna access points (APs), serving multiple single-antenna communication users and sensing targets. However, efficient beamforming designs that achieve high precision and robust performance in densely populated networks are lacking. This paper proposes a new beamforming algorithm by exploiting the inherent Riemannian manifold structure. The aim is to maximize the communication sum rate while satisfying sensing beampattern gains and per AP transmit power constraints. To address this constrained optimization problem, a highly efficient augmented Lagrangian model-based iterative manifold optimization for CF-ISAC (ALMCI) algorithm is developed. This algorithm exploits the geometry of the proposed problem and uses a complex oblique manifold. Conventional convex-concave procedure (CCPA) and multidimensional complex quadratic transform (MCQT)-CSA algorithms are also developed as comparative benchmarks. The ALMCI algorithm significantly outperforms both of these. For example, with 16 APs having 12 antennas and 30 dBm transmit power each, our proposed ALMCI algorithm yields 22.7% and 6.7% sum rate gains over the CCPA and MCQT-CSA algorithms, respectively. In addition to improvement in communication capacity, the ALMCI algorithm achieves superior beamforming gains and reduced complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06847v1-abstract-full').style.display = 'none'; document.getElementById('2409.06847v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 13 figures, submitted to an IEEE Transactions Journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.04302">arXiv:2409.04302</a> <span> [<a href="https://arxiv.org/pdf/2409.04302">pdf</a>, <a href="https://arxiv.org/format/2409.04302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Fast Adaptation for Deep Learning-based Wireless Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+O">Ouya Wang</a>, <a href="/search/eess?searchtype=author&query=He%2C+H">Hengtao He</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+S">Shenglong Zhou</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Z">Zhi Ding</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+S">Shi Jin</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled B. Letaief</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.04302v1-abstract-short" style="display: inline;"> The integration with artificial intelligence (AI) is recognized as one of the six usage scenarios in next-generation wireless communications. However, several critical challenges hinder the widespread application of deep learning (DL) techniques in wireless communications. In particular, existing DL-based wireless communications struggle to adapt to the rapidly changing wireless environments. In t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04302v1-abstract-full').style.display = 'inline'; document.getElementById('2409.04302v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.04302v1-abstract-full" style="display: none;"> The integration with artificial intelligence (AI) is recognized as one of the six usage scenarios in next-generation wireless communications. However, several critical challenges hinder the widespread application of deep learning (DL) techniques in wireless communications. In particular, existing DL-based wireless communications struggle to adapt to the rapidly changing wireless environments. In this paper, we discuss fast adaptation for DL-based wireless communications by using few-shot learning (FSL) techniques. We first identify the differences between fast adaptation in wireless communications and traditional AI tasks by highlighting two distinct FSL design requirements for wireless communications. To establish a wide perspective, we present a comprehensive review of the existing FSL techniques in wireless communications that satisfy these two design requirements. In particular, we emphasize the importance of applying domain knowledge in achieving fast adaptation. We specifically focus on multiuser multiple-input multiple-output (MU-MIMO) precoding as an examples to demonstrate the advantages of the FSL to achieve fast adaptation in wireless communications. Finally, we highlight several open research issues for achieving broadscope future deployment of fast adaptive DL in wireless communication applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04302v1-abstract-full').style.display = 'none'; document.getElementById('2409.04302v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03265">arXiv:2409.03265</a> <span> [<a href="https://arxiv.org/pdf/2409.03265">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Enhancing digital core image resolution using optimal upscaling algorithm: with application to paired SEM images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=You%2C+S">Shaohua You</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+S">Shuqi Sun</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Z">Zhengting Yan</a>, <a href="/search/eess?searchtype=author&query=Liao%2C+Q">Qinzhuo Liao</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+H">Huiying Tang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+L">Lianhe Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Gensheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03265v1-abstract-short" style="display: inline;"> The porous media community extensively utilizes digital rock images for core analysis. High-resolution digital rock images that possess sufficient quality are essential but often challenging to acquire. Super-resolution (SR) approaches enhance the resolution of digital rock images and provide improved visualization of fine features and structures, aiding in the analysis and interpretation of rock… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03265v1-abstract-full').style.display = 'inline'; document.getElementById('2409.03265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03265v1-abstract-full" style="display: none;"> The porous media community extensively utilizes digital rock images for core analysis. High-resolution digital rock images that possess sufficient quality are essential but often challenging to acquire. Super-resolution (SR) approaches enhance the resolution of digital rock images and provide improved visualization of fine features and structures, aiding in the analysis and interpretation of rock properties, such as pore connectivity and mineral distribution. However, there is a current shortage of real paired microscopic images for super-resolution training. In this study, we used two types of Scanning Electron Microscopes (SEM) to obtain the images of shale samples in five regions, with 1X, 2X, 4X, 8X and 16X magnifications. We used these real scanned paired images as a reference to select the optimal method of image generation and validated it using Enhanced Deep Super Resolution (EDSR) and Very Deep Super Resolution (VDSR) methods. Our experiments show that the bilinear algorithm is more suitable than the commonly used bicubic method, for establishing low-resolution datasets in the SR approaches, which is partially attributed to the mechanism of Scanning Electron Microscopes (SEM). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03265v1-abstract-full').style.display = 'none'; document.getElementById('2409.03265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.17252">arXiv:2408.17252</a> <span> [<a href="https://arxiv.org/pdf/2408.17252">pdf</a>, <a href="https://arxiv.org/format/2408.17252">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Homogeneous Graph Neural Network for Precoding and Power Allocation in Scalable Wireless Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sun%2C+M">Mingjun Sun</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+S">Shaochuan Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Haojie Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yuanwei Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guoyu Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+T">Tong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.17252v2-abstract-short" style="display: inline;"> Deep learning is widely used in wireless communications but struggles with fixed neural network sizes, which limit their adaptability in environments where the number of users and antennas varies. To overcome this, this paper introduced a generalization strategy for precoding and power allocation in scalable wireless networks. Initially, we employ an innovative approach to abstract the wireless ne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.17252v2-abstract-full').style.display = 'inline'; document.getElementById('2408.17252v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.17252v2-abstract-full" style="display: none;"> Deep learning is widely used in wireless communications but struggles with fixed neural network sizes, which limit their adaptability in environments where the number of users and antennas varies. To overcome this, this paper introduced a generalization strategy for precoding and power allocation in scalable wireless networks. Initially, we employ an innovative approach to abstract the wireless network into a homogeneous graph. This primarily focuses on bypassing the heterogeneous features between transmitter (TX) and user entities to construct a virtual homogeneous graph serving optimization objectives, thereby enabling all nodes in the virtual graph to share the same neural network. This ``TX entity'' is known as a base station (BS) in cellular networks and an access point (AP) in cell-free networks. Subsequently, we design a universal graph neural network, termed the information carrying graph neural network (ICGNN), to capture and integrate information from this graph, maintaining permutation invariance. Lastly, using ICGNN as the core algorithm, we tailor the neural network's input and output for specific problem requirements and validate its performance in two scenarios: 1) in cellular networks, we develop a matrix-inverse-free multi-user multi-input multi-output (MU-MIMO) precoding scheme using the conjugate gradient (CG) method, adaptable to varying user and antenna numbers; 2) in a cell-free network, facing dynamic variations in the number of users served by APs, the number of APs serving each user, and the number of antennas per AP, we propose a universal power allocation scheme. Simulations demonstrate that the proposed approach not only significantly reduces computational complexity but also achieves, and potentially exceeds, the spectral efficiency (SE) of conventional algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.17252v2-abstract-full').style.display = 'none'; document.getElementById('2408.17252v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Li%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Li%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+G&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+G&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository