Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 146 results for author: <span class="mathjax">Xiao, X</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Xiao%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Xiao, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Xiao%2C+X&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Xiao, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xiao%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xiao%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xiao%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Xiao%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04649">arXiv:2502.04649</a> <span> [<a href="https://arxiv.org/pdf/2502.04649">pdf</a>, <a href="https://arxiv.org/format/2502.04649">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> End-to-End Learning Framework for Solving Non-Markovian Optimal Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaole Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+P">Peiyu Zhang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiongye Xiao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S">Shixuan Li</a>, <a href="/search/eess?searchtype=author&query=Tzoumas%2C+V">Vasileios Tzoumas</a>, <a href="/search/eess?searchtype=author&query=Gupta%2C+V">Vijay Gupta</a>, <a href="/search/eess?searchtype=author&query=Bogdan%2C+P">Paul Bogdan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04649v3-abstract-short" style="display: inline;"> Integer-order calculus often falls short in capturing the long-range dependencies and memory effects found in many real-world processes. Fractional calculus addresses these gaps via fractional-order integrals and derivatives, but fractional-order dynamical systems pose substantial challenges in system identification and optimal control due to the lack of standard control methodologies. In this pap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04649v3-abstract-full').style.display = 'inline'; document.getElementById('2502.04649v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04649v3-abstract-full" style="display: none;"> Integer-order calculus often falls short in capturing the long-range dependencies and memory effects found in many real-world processes. Fractional calculus addresses these gaps via fractional-order integrals and derivatives, but fractional-order dynamical systems pose substantial challenges in system identification and optimal control due to the lack of standard control methodologies. In this paper, we theoretically derive the optimal control via linear quadratic regulator (LQR) for fractional-order linear time-invariant (FOLTI) systems and develop an end-to-end deep learning framework based on this theoretical foundation. Our approach establishes a rigorous mathematical model, derives analytical solutions, and incorporates deep learning to achieve data-driven optimal control of FOLTI systems. Our key contributions include: (i) proposing an innovative system identification method control strategy for FOLTI systems, (ii) developing the first end-to-end data-driven learning framework, Fractional-Order Learning for Optimal Control (FOLOC), that learns control policies from observed trajectories, and (iii) deriving a theoretical analysis of sample complexity to quantify the number of samples required for accurate optimal control in complex real-world problems. Experimental results indicate that our method accurately approximates fractional-order system behaviors without relying on Gaussian noise assumptions, pointing to promising avenues for advanced optimal control. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04649v3-abstract-full').style.display = 'none'; document.getElementById('2502.04649v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03497">arXiv:2502.03497</a> <span> [<a href="https://arxiv.org/pdf/2502.03497">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> SLCGC: A lightweight Self-supervised Low-pass Contrastive Graph Clustering Network for Hyperspectral Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ding%2C+Y">Yao Ding</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhili Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+A">Aitao Yang</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+Y">Yaoming Cai</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiongwu Xiao</a>, <a href="/search/eess?searchtype=author&query=Hong%2C+D">Danfeng Hong</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+J">Junsong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03497v2-abstract-short" style="display: inline;"> Self-supervised hyperspectral image (HSI) clustering remains a fundamental yet challenging task due to the absence of labeled data and the inherent complexity of spatial-spectral interactions. While recent advancements have explored innovative approaches, existing methods face critical limitations in clustering accuracy, feature discriminability, computational efficiency, and robustness to noise,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03497v2-abstract-full').style.display = 'inline'; document.getElementById('2502.03497v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03497v2-abstract-full" style="display: none;"> Self-supervised hyperspectral image (HSI) clustering remains a fundamental yet challenging task due to the absence of labeled data and the inherent complexity of spatial-spectral interactions. While recent advancements have explored innovative approaches, existing methods face critical limitations in clustering accuracy, feature discriminability, computational efficiency, and robustness to noise, hindering their practical deployment. In this paper, a self-supervised efficient low-pass contrastive graph clustering (SLCGC) is introduced for HSIs. Our approach begins with homogeneous region generation, which aggregates pixels into spectrally consistent regions to preserve local spatial-spectral coherence while drastically reducing graph complexity. We then construct a structural graph using an adjacency matrix A and introduce a low-pass graph denoising mechanism to suppress high-frequency noise in the graph topology, ensuring stable feature propagation. A dual-branch graph contrastive learning module is developed, where Gaussian noise perturbations generate augmented views through two multilayer perceptrons (MLPs), and a cross-view contrastive loss enforces structural consistency between views to learn noise-invariant representations. Finally, latent embeddings optimized by this process are clustered via K-means. Extensive experiments and repeated comparative analysis have verified that our SLCGC contains high clustering accuracy, low computational complexity, and strong robustness. The code source will be available at https://github.com/DY-HYX. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03497v2-abstract-full').style.display = 'none'; document.getElementById('2502.03497v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02683">arXiv:2502.02683</a> <span> [<a href="https://arxiv.org/pdf/2502.02683">pdf</a>, <a href="https://arxiv.org/format/2502.02683">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Streaming Speaker Change Detection and Gender Classification for Transducer-Based Multi-Talker Speech Translation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+P">Peidong Wang</a>, <a href="/search/eess?searchtype=author&query=Kanda%2C+N">Naoyuki Kanda</a>, <a href="/search/eess?searchtype=author&query=Xue%2C+J">Jian Xue</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jinyu Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaofei Wang</a>, <a href="/search/eess?searchtype=author&query=Subramanian%2C+A+S">Aswin Shanmugam Subramanian</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Junkun Chen</a>, <a href="/search/eess?searchtype=author&query=Sivasankaran%2C+S">Sunit Sivasankaran</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiong Xiao</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yong Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02683v1-abstract-short" style="display: inline;"> Streaming multi-talker speech translation is a task that involves not only generating accurate and fluent translations with low latency but also recognizing when a speaker change occurs and what the speaker's gender is. Speaker change information can be used to create audio prompts for a zero-shot text-to-speech system, and gender can help to select speaker profiles in a conventional text-to-speec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02683v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02683v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02683v1-abstract-full" style="display: none;"> Streaming multi-talker speech translation is a task that involves not only generating accurate and fluent translations with low latency but also recognizing when a speaker change occurs and what the speaker's gender is. Speaker change information can be used to create audio prompts for a zero-shot text-to-speech system, and gender can help to select speaker profiles in a conventional text-to-speech model. We propose to tackle streaming speaker change detection and gender classification by incorporating speaker embeddings into a transducer-based streaming end-to-end speech translation model. Our experiments demonstrate that the proposed methods can achieve high accuracy for both speaker change detection and gender classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02683v1-abstract-full').style.display = 'none'; document.getElementById('2502.02683v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07041">arXiv:2501.07041</a> <span> [<a href="https://arxiv.org/pdf/2501.07041">pdf</a>, <a href="https://arxiv.org/format/2501.07041">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Beam Structured Turbo Receiver for HF Skywave Massive MIMO </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+D">Ding Shi</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07041v1-abstract-short" style="display: inline;"> In this paper, we investigate receiver design for high frequency (HF) skywave massive multiple-input multiple-output (MIMO) communications. We first establish a modified beam based channel model (BBCM) by performing uniform sampling for directional cosine with deterministic sampling interval, where the beam matrix is constructed using a phase-shifted discrete Fourier transform (DFT) matrix. Based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07041v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07041v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07041v1-abstract-full" style="display: none;"> In this paper, we investigate receiver design for high frequency (HF) skywave massive multiple-input multiple-output (MIMO) communications. We first establish a modified beam based channel model (BBCM) by performing uniform sampling for directional cosine with deterministic sampling interval, where the beam matrix is constructed using a phase-shifted discrete Fourier transform (DFT) matrix. Based on the modified BBCM, we propose a beam structured turbo receiver (BSTR) involving low-dimensional beam domain signal detection for grouped user terminals (UTs), which is proved to be asymptotically optimal in terms of minimizing mean-squared error (MSE). Moreover, we extend it to windowed BSTR by introducing a windowing approach for interference suppression and complexity reduction, and propose a well-designed energy-focusing window. We also present an efficient implementation of the windowed BSTR by exploiting the structure properties of the beam matrix and the beam domain channel sparsity. Simulation results validate the superior performance of the proposed receivers but with remarkably low complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07041v1-abstract-full').style.display = 'none'; document.getElementById('2501.07041v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03526">arXiv:2501.03526</a> <span> [<a href="https://arxiv.org/pdf/2501.03526">pdf</a>, <a href="https://arxiv.org/format/2501.03526">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FgC2F-UDiff: Frequency-guided and Coarse-to-fine Unified Diffusion Model for Multi-modality Missing MRI Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiaojiao Xiao</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Q+V">Qinmin Vivian Hu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+G">Guanghui Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03526v1-abstract-short" style="display: inline;"> Multi-modality magnetic resonance imaging (MRI) is essential for the diagnosis and treatment of brain tumors. However, missing modalities are commonly observed due to limitations in scan time, scan corruption, artifacts, motion, and contrast agent intolerance. Synthesis of missing MRI has been a means to address the limitations of modality insufficiency in clinical practice and research. However,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03526v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03526v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03526v1-abstract-full" style="display: none;"> Multi-modality magnetic resonance imaging (MRI) is essential for the diagnosis and treatment of brain tumors. However, missing modalities are commonly observed due to limitations in scan time, scan corruption, artifacts, motion, and contrast agent intolerance. Synthesis of missing MRI has been a means to address the limitations of modality insufficiency in clinical practice and research. However, there are still some challenges, such as poor generalization, inaccurate non-linear mapping, and slow processing speeds. To address the aforementioned issues, we propose a novel unified synthesis model, the Frequency-guided and Coarse-to-fine Unified Diffusion Model (FgC2F-UDiff), designed for multiple inputs and outputs. Specifically, the Coarse-to-fine Unified Network (CUN) fully exploits the iterative denoising properties of diffusion models, from global to detail, by dividing the denoising process into two stages, coarse and fine, to enhance the fidelity of synthesized images. Secondly, the Frequency-guided Collaborative Strategy (FCS) harnesses appropriate frequency information as prior knowledge to guide the learning of a unified, highly non-linear mapping. Thirdly, the Specific-acceleration Hybrid Mechanism (SHM) integrates specific mechanisms to accelerate the diffusion model and enhance the feasibility of many-to-many synthesis. Extensive experimental evaluations have demonstrated that our proposed FgC2F-UDiff model achieves superior performance on two datasets, validated through a comprehensive assessment that includes both qualitative observations and quantitative metrics, such as PSNR SSIM, LPIPS, and FID. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03526v1-abstract-full').style.display = 'none'; document.getElementById('2501.03526v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Computational Imaging, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00641">arXiv:2501.00641</a> <span> [<a href="https://arxiv.org/pdf/2501.00641">pdf</a>, <a href="https://arxiv.org/ps/2501.00641">ps</a>, <a href="https://arxiv.org/format/2501.00641">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Rethink Delay Doppler Channels and Time-Frequency Coding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00641v2-abstract-short" style="display: inline;"> In this paper, we rethink delay Doppler channels (also called doubly selective channels). We prove that no modulation schemes (including the current active VOFDM/OTFS) can compensate a non-trivial Doppler spread well. We then discuss some of the existing methods to deal with time-varying channels, in particular time-frequency (TF) coding in an OFDM system. TF coding is equivalent to space-time cod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00641v2-abstract-full').style.display = 'inline'; document.getElementById('2501.00641v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00641v2-abstract-full" style="display: none;"> In this paper, we rethink delay Doppler channels (also called doubly selective channels). We prove that no modulation schemes (including the current active VOFDM/OTFS) can compensate a non-trivial Doppler spread well. We then discuss some of the existing methods to deal with time-varying channels, in particular time-frequency (TF) coding in an OFDM system. TF coding is equivalent to space-time coding in the math part. We also summarize state of the art on space-time coding that was an active research topic over a decade ago. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00641v2-abstract-full').style.display = 'none'; document.getElementById('2501.00641v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20885">arXiv:2412.20885</a> <span> [<a href="https://arxiv.org/pdf/2412.20885">pdf</a>, <a href="https://arxiv.org/ps/2412.20885">ps</a>, <a href="https://arxiv.org/format/2412.20885">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> CF-CGN: Channel Fingerprints Extrapolation for Multi-band Massive MIMO Transmission based on Cycle-Consistent Generative Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xie%2C+C">Chenjie Xie</a>, <a href="/search/eess?searchtype=author&query=You%2C+L">Li You</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+Z">Zhenzhou Jin</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+J">Jinke Tang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20885v1-abstract-short" style="display: inline;"> Multi-band massive multiple-input multiple-output (MIMO) communication can promote the cooperation of licensed and unlicensed spectra, effectively enhancing spectrum efficiency for Wi-Fi and other wireless systems. As an enabler for multi-band transmission, channel fingerprints (CF), also known as the channel knowledge map or radio environment map, are used to assist channel state information (CSI… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20885v1-abstract-full').style.display = 'inline'; document.getElementById('2412.20885v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20885v1-abstract-full" style="display: none;"> Multi-band massive multiple-input multiple-output (MIMO) communication can promote the cooperation of licensed and unlicensed spectra, effectively enhancing spectrum efficiency for Wi-Fi and other wireless systems. As an enabler for multi-band transmission, channel fingerprints (CF), also known as the channel knowledge map or radio environment map, are used to assist channel state information (CSI) acquisition and reduce computational complexity. In this paper, we propose CF-CGN (Channel Fingerprints with Cycle-consistent Generative Networks) to extrapolate CF for multi-band massive MIMO transmission where licensed and unlicensed spectra cooperate to provide ubiquitous connectivity. Specifically, we first model CF as a multichannel image and transform the extrapolation problem into an image translation task, which converts CF from one frequency to another by exploring the shared characteristics of statistical CSI in the beam domain. Then, paired generative networks are designed and coupled by variable-weight cycle consistency losses to fit the reciprocal relationship at different bands. Matched with the coupled networks, a joint training strategy is developed accordingly, supporting synchronous optimization of all trainable parameters. During the inference process, we also introduce a refining scheme to improve the extrapolation accuracy based on the resolution of CF. Numerical results illustrate that our proposed CF-CGN can achieve bidirectional extrapolation with an error of 5-17 dB lower than the benchmarks in different communication scenarios, demonstrating its excellent generalization ability. We further show that the sum rate performance assisted by CF-CGN-based CF is close to that with perfect CSI for multi-band massive MIMO transmission. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20885v1-abstract-full').style.display = 'none'; document.getElementById('2412.20885v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18281">arXiv:2412.18281</a> <span> [<a href="https://arxiv.org/pdf/2412.18281">pdf</a>, <a href="https://arxiv.org/format/2412.18281">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> GDM4MMIMO: Generative Diffusion Models for Massive MIMO Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jin%2C+Z">Zhenzhou Jin</a>, <a href="/search/eess?searchtype=author&query=You%2C+L">Li You</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+H">Huibin Zhou</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuanshuo Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xiaofeng Liu</a>, <a href="/search/eess?searchtype=author&query=Gong%2C+X">Xinrui Gong</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a>, <a href="/search/eess?searchtype=author&query=Ng%2C+D+W+K">Derrick Wing Kwan Ng</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18281v1-abstract-short" style="display: inline;"> Massive multiple-input multiple-output (MIMO) offers significant advantages in spectral and energy efficiencies, positioning it as a cornerstone technology of fifth-generation (5G) wireless communication systems and a promising solution for the burgeoning data demands anticipated in sixth-generation (6G) networks. In recent years, with the continuous advancement of artificial intelligence (AI), a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18281v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18281v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18281v1-abstract-full" style="display: none;"> Massive multiple-input multiple-output (MIMO) offers significant advantages in spectral and energy efficiencies, positioning it as a cornerstone technology of fifth-generation (5G) wireless communication systems and a promising solution for the burgeoning data demands anticipated in sixth-generation (6G) networks. In recent years, with the continuous advancement of artificial intelligence (AI), a multitude of task-oriented generative foundation models (GFMs) have emerged, achieving remarkable performance in various fields such as computer vision (CV), natural language processing (NLP), and autonomous driving. As a pioneering force, these models are driving the paradigm shift in AI towards generative AI (GenAI). Among them, the generative diffusion model (GDM), as one of state-of-the-art families of generative models, demonstrates an exceptional capability to learn implicit prior knowledge and robust generalization capabilities, thereby enhancing its versatility and effectiveness across diverse applications. In this paper, we delve into the potential applications of GDM in massive MIMO communications. Specifically, we first provide an overview of massive MIMO communication, the framework of GFMs, and the working mechanism of GDM. Following this, we discuss recent research advancements in the field and present a case study of near-field channel estimation based on GDM, demonstrating its promising potential for facilitating efficient ultra-dimensional channel statement information (CSI) acquisition in the context of massive MIMO communications. Finally, we highlight several pressing challenges in future mobile communications and identify promising research directions surrounding GDM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18281v1-abstract-full').style.display = 'none'; document.getElementById('2412.18281v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12531">arXiv:2412.12531</a> <span> [<a href="https://arxiv.org/pdf/2412.12531">pdf</a>, <a href="https://arxiv.org/ps/2412.12531">ps</a>, <a href="https://arxiv.org/format/2412.12531">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Movable Antenna Aided NOMA: Joint Antenna Positioning, Precoding, and Decoding Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zhenyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhe Li</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/eess?searchtype=author&query=Ning%2C+B">Boyu Ning</a>, <a href="/search/eess?searchtype=author&query=da+Costa%2C+D+B">Daniel Benevides da Costa</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12531v1-abstract-short" style="display: inline;"> This paper investigates movable antenna (MA) aided non-orthogonal multiple access (NOMA) for multi-user downlink communication, where the base station (BS) is equipped with a fixed-position antenna (FPA) array to serve multiple MA-enabled users. An optimization problem is formulated to maximize the minimum achievable rate among all the users by jointly optimizing the MA positioning of each user, t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12531v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12531v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12531v1-abstract-full" style="display: none;"> This paper investigates movable antenna (MA) aided non-orthogonal multiple access (NOMA) for multi-user downlink communication, where the base station (BS) is equipped with a fixed-position antenna (FPA) array to serve multiple MA-enabled users. An optimization problem is formulated to maximize the minimum achievable rate among all the users by jointly optimizing the MA positioning of each user, the precoding matrix at the BS, and the successive interference cancellation (SIC) decoding indicator matrix at the users, subject to a set of constraints including the limited movement area of the MAs, the maximum transmit power of the BS, and the SIC decoding condition. To solve this non-convex problem, we propose a two-loop iterative optimization algorithm that combines the hippopotamus optimization (HO) method with the alternating optimization (AO) method to obtain a suboptimal solution efficiently. Specifically, in the inner loop, the complex-valued precoding matrix and the binary decoding indicator matrix are optimized alternatively by the successive convex approximation (SCA) technique with customized greedy search to maximize the minimum achievable rate for the given positions of the MAs. In the outer loop, each user's antenna position is updated using the HO algorithm, following a novel nature-inspired intelligent optimization framework. Simulation results show that the proposed algorithms can effectively avoid local optimum for highly coupled variables and significantly improve the rate performance of the NOMA system compared to the conventional FPA system as well as other benchmark schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12531v1-abstract-full').style.display = 'none'; document.getElementById('2412.12531v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12126">arXiv:2412.12126</a> <span> [<a href="https://arxiv.org/pdf/2412.12126">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Seamless Optical Cloud Computing across Edge-Metro Network for Generative AI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xing%2C+S">Sizhe Xing</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+A">Aolong Sun</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Chengxi Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yizhi Wang</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+B">Boyu Dong</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+J">Junhui Hu</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+X">Xuyu Deng</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+A">An Yan</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yingjun Liu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+F">Fangchen Hu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhongya Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+O">Ouhan Huang</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+J">Junhao Zhao</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yingjun Zhou</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Ziwei Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+J">Jianyang Shi</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xi Xiao</a>, <a href="/search/eess?searchtype=author&query=Penty%2C+R">Richard Penty</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+Q">Qixiang Cheng</a>, <a href="/search/eess?searchtype=author&query=Chi%2C+N">Nan Chi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Junwen Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12126v1-abstract-short" style="display: inline;"> The rapid advancement of generative artificial intelligence (AI) in recent years has profoundly reshaped modern lifestyles, necessitating a revolutionary architecture to support the growing demands for computational power. Cloud computing has become the driving force behind this transformation. However, it consumes significant power and faces computation security risks due to the reliance on exten… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12126v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12126v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12126v1-abstract-full" style="display: none;"> The rapid advancement of generative artificial intelligence (AI) in recent years has profoundly reshaped modern lifestyles, necessitating a revolutionary architecture to support the growing demands for computational power. Cloud computing has become the driving force behind this transformation. However, it consumes significant power and faces computation security risks due to the reliance on extensive data centers and servers in the cloud. Reducing power consumption while enhancing computational scale remains persistent challenges in cloud computing. Here, we propose and experimentally demonstrate an optical cloud computing system that can be seamlessly deployed across edge-metro network. By modulating inputs and models into light, a wide range of edge nodes can directly access the optical computing center via the edge-metro network. The experimental validations show an energy efficiency of 118.6 mW/TOPs (tera operations per second), reducing energy consumption by two orders of magnitude compared to traditional electronic-based cloud computing solutions. Furthermore, it is experimentally validated that this architecture can perform various complex generative AI models through parallel computing to achieve image generation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12126v1-abstract-full').style.display = 'none'; document.getElementById('2412.12126v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10736">arXiv:2412.10736</a> <span> [<a href="https://arxiv.org/pdf/2412.10736">pdf</a>, <a href="https://arxiv.org/format/2412.10736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> 6D Movable Antenna Enhanced Multi-Access Point Coordination via Position and Orientation Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Pi%2C+X">Xiangyu Pi</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/eess?searchtype=author&query=Mao%2C+H">Haobin Mao</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zhenyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10736v1-abstract-short" style="display: inline;"> The effective utilization of unlicensed spectrum is regarded as an important direction to enable the massive access and broad coverage for next-generation wireless local area network (WLAN). Due to the crowded spectrum occupancy and dense user terminals (UTs), the conventional fixed antenna (FA)-based access points (APs) face huge challenges in realizing massive access and interference cancellatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10736v1-abstract-full').style.display = 'inline'; document.getElementById('2412.10736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10736v1-abstract-full" style="display: none;"> The effective utilization of unlicensed spectrum is regarded as an important direction to enable the massive access and broad coverage for next-generation wireless local area network (WLAN). Due to the crowded spectrum occupancy and dense user terminals (UTs), the conventional fixed antenna (FA)-based access points (APs) face huge challenges in realizing massive access and interference cancellation. To address this issue, in this paper we develop a six-dimensional movable antenna (6DMA) enhanced multi-AP coordination system for coverage enhancement and interference mitigation. First, we model the wireless channels between the APs and UTs to characterize their variation with respect to 6DMA movement, in terms of both the three-dimensional (3D) position and 3D orientation of each distributed AP's antenna. Then, an optimization problem is formulated to maximize the weighted sum rate of multiple UTs for their uplink transmissions by jointly optimizing the antenna position vector (APV), the antenna orientation matrix (AOM), and the receive combining matrix over all coordinated APs, subject to the constraints on local antenna movement regions. To solve this challenging non-convex optimization problem, we first transform it into a more tractable Lagrangian dual problem. Then, an alternating optimization (AO)-based algorithm is developed by iteratively optimizing the APV and AOM, which are designed by applying the successive convex approximation (SCA) technique and Riemannian manifold optimization-based algorithm, respectively. Simulation results show that the proposed 6DMA-enhanced multi-AP coordination system can significantly enhance network capacity, and both of the online and offline 6DMA schemes can attain considerable performance improvement compared to the conventional FA-based schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10736v1-abstract-full').style.display = 'none'; document.getElementById('2412.10736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 9 figures, submitted to an IEEE journal for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08278">arXiv:2412.08278</a> <span> [<a href="https://arxiv.org/pdf/2412.08278">pdf</a>, <a href="https://arxiv.org/ps/2412.08278">ps</a>, <a href="https://arxiv.org/format/2412.08278">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Toward Near-Globally Optimal Nonlinear Model Predictive Control via Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+T">Tzu-Yuan Huang</a>, <a href="/search/eess?searchtype=author&query=Lederer%2C+A">Armin Lederer</a>, <a href="/search/eess?searchtype=author&query=Hoischen%2C+N">Nicolas Hoischen</a>, <a href="/search/eess?searchtype=author&query=Br%C3%BCdigam%2C+J">Jan Br眉digam</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xuehua Xiao</a>, <a href="/search/eess?searchtype=author&query=Sosnowski%2C+S">Stefan Sosnowski</a>, <a href="/search/eess?searchtype=author&query=Hirche%2C+S">Sandra Hirche</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08278v1-abstract-short" style="display: inline;"> Achieving global optimality in nonlinear model predictive control (NMPC) is challenging due to the non-convex nature of the underlying optimization problem. Since commonly employed local optimization techniques depend on carefully chosen initial guesses, this non-convexity often leads to suboptimal performance resulting from local optima. To overcome this limitation, we propose a novel diffusion m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08278v1-abstract-full').style.display = 'inline'; document.getElementById('2412.08278v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08278v1-abstract-full" style="display: none;"> Achieving global optimality in nonlinear model predictive control (NMPC) is challenging due to the non-convex nature of the underlying optimization problem. Since commonly employed local optimization techniques depend on carefully chosen initial guesses, this non-convexity often leads to suboptimal performance resulting from local optima. To overcome this limitation, we propose a novel diffusion model-based approach for near-globally optimal NMPC consisting of an offline and an online phase. The offline phase employs a local optimizer to sample from the distribution of optimal NMPC control sequences along generated system trajectories through random initial guesses. Subsequently, the generated diverse data set is used to train a diffusion model to reflect the multi-modal distribution of optima. In the online phase, the trained model is leveraged to efficiently perform a variant of random shooting optimization to obtain near-globally optimal control sequences without relying on any initial guesses or online NMPC solving. The effectiveness of our approach is illustrated in a numerical simulation indicating high performance benefits compared to direct neural network approximations of NMPC and significantly lower computation times than online solving NMPC using global optimizers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08278v1-abstract-full').style.display = 'none'; document.getElementById('2412.08278v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.02655">arXiv:2412.02655</a> <span> [<a href="https://arxiv.org/pdf/2412.02655">pdf</a>, <a href="https://arxiv.org/format/2412.02655">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> LLM-Enhanced Path Planning: Safe and Efficient Autonomous Navigation with Instructional Inputs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Doma%2C+P">Pranav Doma</a>, <a href="/search/eess?searchtype=author&query=Arab%2C+A">Aliasghar Arab</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xuesu Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.02655v1-abstract-short" style="display: inline;"> Autonomous navigation guided by natural language instructions is essential for improving human-robot interaction and enabling complex operations in dynamic environments. While large language models (LLMs) are not inherently designed for planning, they can significantly enhance planning efficiency by providing guidance and informing constraints to ensure safety. This paper introduces a planning fra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02655v1-abstract-full').style.display = 'inline'; document.getElementById('2412.02655v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.02655v1-abstract-full" style="display: none;"> Autonomous navigation guided by natural language instructions is essential for improving human-robot interaction and enabling complex operations in dynamic environments. While large language models (LLMs) are not inherently designed for planning, they can significantly enhance planning efficiency by providing guidance and informing constraints to ensure safety. This paper introduces a planning framework that integrates LLMs with 2D occupancy grid maps and natural language commands to improve spatial reasoning and task execution in resource-limited settings. By decomposing high-level commands and real-time environmental data, the system generates structured navigation plans for pick-and-place tasks, including obstacle avoidance, goal prioritization, and adaptive behaviors. The framework dynamically recalculates paths to address environmental changes and aligns with implicit social norms for seamless human-robot interaction. Our results demonstrates the potential of LLMs to design context-aware system to enhance navigation efficiency and safety in industrial and dynamic environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02655v1-abstract-full').style.display = 'none'; document.getElementById('2412.02655v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09436">arXiv:2410.09436</a> <span> [<a href="https://arxiv.org/pdf/2410.09436">pdf</a>, <a href="https://arxiv.org/ps/2410.09436">ps</a>, <a href="https://arxiv.org/format/2410.09436">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Sum Rate Maximization for Movable Antenna Enhanced Multiuser Covert Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Mao%2C+H">Haobin Mao</a>, <a href="/search/eess?searchtype=author&query=Pi%2C+X">Xiangyu Pi</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zhenyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09436v2-abstract-short" style="display: inline;"> In this letter, we propose to employ movable antenna (MA) to enhance covert communications with noise uncertainty, where the confidential data is transmitted from an MA-aided access point (AP) to multiple users with a warden attempting to detect the existence of the legal transmission. To maximize the sum rate of users under covertness constraint, we formulate an optimization problem to jointly de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09436v2-abstract-full').style.display = 'inline'; document.getElementById('2410.09436v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09436v2-abstract-full" style="display: none;"> In this letter, we propose to employ movable antenna (MA) to enhance covert communications with noise uncertainty, where the confidential data is transmitted from an MA-aided access point (AP) to multiple users with a warden attempting to detect the existence of the legal transmission. To maximize the sum rate of users under covertness constraint, we formulate an optimization problem to jointly design the transmit beamforming and the positions of MAs at the AP. To solve the formulated non-convex optimization problem, we develop a block successive upper-bound minimization (BSUM) based algorithm, where the proximal distance algorithm (PDA) and the successive convex approximation (SCA) are employed to optimize the transmit beamforming and the MAs' positions, respectively. Simulation results show that the proposed MAs-aided system can significantly increase the covert sum rate via antenna position optimization as compared to conventional systems with fixed-position antennas (FPAs). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09436v2-abstract-full').style.display = 'none'; document.getElementById('2410.09436v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 5 figures (subfigures included), submitted to an IEEE journal for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03559">arXiv:2410.03559</a> <span> [<a href="https://arxiv.org/pdf/2410.03559">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Optimizing food taste sensory evaluation through neural network-based taste electroencephalogram channel selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiuxin Xia</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qun Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">He Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Chenrui Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+P">Pengwei Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yan Shi</a>, <a href="/search/eess?searchtype=author&query=Men%2C+H">Hong Men</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03559v1-abstract-short" style="display: inline;"> The taste electroencephalogram (EEG) evoked by the taste stimulation can reflect different brain patterns and be used in applications such as sensory evaluation of food. However, considering the computational cost and efficiency, EEG data with many channels has to face the critical issue of channel selection. This paper proposed a channel selection method called class activation mapping with atten… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03559v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03559v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03559v1-abstract-full" style="display: none;"> The taste electroencephalogram (EEG) evoked by the taste stimulation can reflect different brain patterns and be used in applications such as sensory evaluation of food. However, considering the computational cost and efficiency, EEG data with many channels has to face the critical issue of channel selection. This paper proposed a channel selection method called class activation mapping with attention (CAM-Attention). The CAM-Attention method combined a convolutional neural network with channel and spatial attention (CNN-CSA) model with a gradient-weighted class activation mapping (Grad-CAM) model. The CNN-CSA model exploited key features in EEG data by attention mechanism, and the Grad-CAM model effectively realized the visualization of feature regions. Then, channel selection was effectively implemented based on feature regions. Finally, the CAM-Attention method reduced the computational burden of taste EEG recognition and effectively distinguished the four tastes. In short, it has excellent recognition performance and provides effective technical support for taste sensory evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03559v1-abstract-full').style.display = 'none'; document.getElementById('2410.03559v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19346">arXiv:2409.19346</a> <span> [<a href="https://arxiv.org/pdf/2409.19346">pdf</a>, <a href="https://arxiv.org/ps/2409.19346">ps</a>, <a href="https://arxiv.org/format/2409.19346">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Channel Estimation for Movable Antenna Aided Wideband Communication Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zhenyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+S">Songqi Cao</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/eess?searchtype=author&query=Ning%2C+B">Boyu Ning</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19346v1-abstract-short" style="display: inline;"> Movable antenna (MA) is an emerging technology that can significantly improve communication performance via the continuous adjustment of the antenna positions. To unleash the potential of MAs in wideband communication systems, acquiring accurate channel state information (CSI), i.e., the channel frequency responses (CFRs) between any position pair within the transmit (Tx) region and the receive (R… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19346v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19346v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19346v1-abstract-full" style="display: none;"> Movable antenna (MA) is an emerging technology that can significantly improve communication performance via the continuous adjustment of the antenna positions. To unleash the potential of MAs in wideband communication systems, acquiring accurate channel state information (CSI), i.e., the channel frequency responses (CFRs) between any position pair within the transmit (Tx) region and the receive (Rx) region across all subcarriers, is a crucial issue. In this paper, we study the channel estimation problem for wideband MA systems. To start with, we express the CFRs as a combination of the field-response vectors (FRVs), delay-response vector (DRV), and path-response tensor (PRT), which exhibit sparse characteristics and can be recovered by using a limited number of channel measurements at selected position pairs of Tx and Rx MAs over a few subcarriers. Specifically, we first formulate the recovery of the FRVs and DRV as a problem with multiple measurement vectors in compressed sensing (MMV-CS), which can be solved via a simultaneous orthogonal matching pursuit (SOMP) algorithm. Next, we estimate the PRT using the least-square (LS) method. Moreover, we also devise an alternating refinement approach to further improve the accuracy of the estimated FRVs, DRV, and PRT. This is achieved by minimizing the discrepancy between the received pilots and those constructed by the estimated CSI, which can be efficiently carried out by using the gradient descent algorithm. Finally, simulation results demonstrate that both the SOMP-based channel estimation method and alternating refinement method can reconstruct the complete wideband CSI with high accuracy, where the alternating refinement method performs better despite a higher complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19346v1-abstract-full').style.display = 'none'; document.getElementById('2409.19346v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16301">arXiv:2409.16301</a> <span> [<a href="https://arxiv.org/pdf/2409.16301">pdf</a>, <a href="https://arxiv.org/format/2409.16301">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Gait Switching and Enhanced Stabilization of Walking Robots with Deep Learning-based Reachability: A Case Study on Two-link Walker </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xingpeng Xia</a>, <a href="/search/eess?searchtype=author&query=Choi%2C+J+J">Jason J. Choi</a>, <a href="/search/eess?searchtype=author&query=Agrawal%2C+A">Ayush Agrawal</a>, <a href="/search/eess?searchtype=author&query=Sreenath%2C+K">Koushil Sreenath</a>, <a href="/search/eess?searchtype=author&query=Tomlin%2C+C+J">Claire J. Tomlin</a>, <a href="/search/eess?searchtype=author&query=Bansal%2C+S">Somil Bansal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16301v1-abstract-short" style="display: inline;"> Learning-based approaches have recently shown notable success in legged locomotion. However, these approaches often lack accountability, necessitating empirical tests to determine their effectiveness. In this work, we are interested in designing a learning-based locomotion controller whose stability can be examined and guaranteed. This can be achieved by verifying regions of attraction (RoAs) of l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16301v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16301v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16301v1-abstract-full" style="display: none;"> Learning-based approaches have recently shown notable success in legged locomotion. However, these approaches often lack accountability, necessitating empirical tests to determine their effectiveness. In this work, we are interested in designing a learning-based locomotion controller whose stability can be examined and guaranteed. This can be achieved by verifying regions of attraction (RoAs) of legged robots to their stable walking gaits. This is a non-trivial problem for legged robots due to their hybrid dynamics. Although previous work has shown the utility of Hamilton-Jacobi (HJ) reachability to solve this problem, its practicality was limited by its poor scalability. The core contribution of our work is the employment of a deep learning-based HJ reachability solution to the hybrid legged robot dynamics, which overcomes the previous work's limitation. With the learned reachability solution, first, we can estimate a library of RoAs for various gaits. Second, we can design a one-step predictive controller that effectively stabilizes to an individual gait within the verified RoA. Finally, we can devise a strategy that switches gaits, in response to external perturbations, whose feasibility is guided by the RoA analysis. We demonstrate our method in a two-link walker simulation, whose mathematical model is well established. Our method achieves improved stability than previous model-based methods, while ensuring transparency that was not present in the existing learning-based approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16301v1-abstract-full').style.display = 'none'; document.getElementById('2409.16301v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The first two authors contributed equally. This work is supported in part by the NSF Grant CMMI-1944722, the NSF CAREER Program under award 2240163, the NASA ULI on Safe Aviation Autonomy, and the DARPA Assured Autonomy and Assured Neuro Symbolic Learning and Reasoning (ANSR) programs. The work of Jason J. Choi received the support of a fellowship from Kwanjeong Educational Foundation, Korea</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03005">arXiv:2409.03005</a> <span> [<a href="https://arxiv.org/pdf/2409.03005">pdf</a>, <a href="https://arxiv.org/format/2409.03005">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> PIETRA: Physics-Informed Evidential Learning for Traversing Out-of-Distribution Terrain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cai%2C+X">Xiaoyi Cai</a>, <a href="/search/eess?searchtype=author&query=Queeney%2C+J">James Queeney</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+T">Tong Xu</a>, <a href="/search/eess?searchtype=author&query=Datar%2C+A">Aniket Datar</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+C">Chenhui Pan</a>, <a href="/search/eess?searchtype=author&query=Miller%2C+M">Max Miller</a>, <a href="/search/eess?searchtype=author&query=Flather%2C+A">Ashton Flather</a>, <a href="/search/eess?searchtype=author&query=Osteen%2C+P+R">Philip R. Osteen</a>, <a href="/search/eess?searchtype=author&query=Roy%2C+N">Nicholas Roy</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xuesu Xiao</a>, <a href="/search/eess?searchtype=author&query=How%2C+J+P">Jonathan P. How</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03005v2-abstract-short" style="display: inline;"> Self-supervised learning is a powerful approach for developing traversability models for off-road navigation, but these models often struggle with inputs unseen during training. Existing methods utilize techniques like evidential deep learning to quantify model uncertainty, helping to identify and avoid out-of-distribution terrain. However, always avoiding out-of-distribution terrain can be overly… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03005v2-abstract-full').style.display = 'inline'; document.getElementById('2409.03005v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03005v2-abstract-full" style="display: none;"> Self-supervised learning is a powerful approach for developing traversability models for off-road navigation, but these models often struggle with inputs unseen during training. Existing methods utilize techniques like evidential deep learning to quantify model uncertainty, helping to identify and avoid out-of-distribution terrain. However, always avoiding out-of-distribution terrain can be overly conservative, e.g., when novel terrain can be effectively analyzed using a physics-based model. To overcome this challenge, we introduce Physics-Informed Evidential Traversability (PIETRA), a self-supervised learning framework that integrates physics priors directly into the mathematical formulation of evidential neural networks and introduces physics knowledge implicitly through an uncertainty-aware, physics-informed training loss. Our evidential network seamlessly transitions between learned and physics-based predictions for out-of-distribution inputs. Additionally, the physics-informed loss regularizes the learned model, ensuring better alignment with the physics model. Extensive simulations and hardware experiments demonstrate that PIETRA improves both learning accuracy and navigation performance in environments with significant distribution shifts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03005v2-abstract-full').style.display = 'none'; document.getElementById('2409.03005v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in RA-L. Video: https://youtu.be/OTnNZ96oJRk</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01695">arXiv:2409.01695</a> <span> [<a href="https://arxiv.org/pdf/2409.01695">pdf</a>, <a href="https://arxiv.org/format/2409.01695">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> USTC-KXDIGIT System Description for ASVspoof5 Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yihao Chen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+H">Haochen Wu</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+N">Nan Jiang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang Xia</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+Q">Qing Gu</a>, <a href="/search/eess?searchtype=author&query=Hao%2C+Y">Yunqi Hao</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+P">Pengfei Cai</a>, <a href="/search/eess?searchtype=author&query=Guan%2C+Y">Yu Guan</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jialong Wang</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+W">Weilin Xie</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+L">Lei Fang</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+S">Sian Fang</a>, <a href="/search/eess?searchtype=author&query=Song%2C+Y">Yan Song</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+L">Lin Liu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+M">Minqiang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01695v1-abstract-short" style="display: inline;"> This paper describes the USTC-KXDIGIT system submitted to the ASVspoof5 Challenge for Track 1 (speech deepfake detection) and Track 2 (spoofing-robust automatic speaker verification, SASV). Track 1 showcases a diverse range of technical qualities from potential processing algorithms and includes both open and closed conditions. For these conditions, our system consists of a cascade of a frontend f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01695v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01695v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01695v1-abstract-full" style="display: none;"> This paper describes the USTC-KXDIGIT system submitted to the ASVspoof5 Challenge for Track 1 (speech deepfake detection) and Track 2 (spoofing-robust automatic speaker verification, SASV). Track 1 showcases a diverse range of technical qualities from potential processing algorithms and includes both open and closed conditions. For these conditions, our system consists of a cascade of a frontend feature extractor and a back-end classifier. We focus on extensive embedding engineering and enhancing the generalization of the back-end classifier model. Specifically, the embedding engineering is based on hand-crafted features and speech representations from a self-supervised model, used for closed and open conditions, respectively. To detect spoof attacks under various adversarial conditions, we trained multiple systems on an augmented training set. Additionally, we used voice conversion technology to synthesize fake audio from genuine audio in the training set to enrich the synthesis algorithms. To leverage the complementary information learned by different model architectures, we employed activation ensemble and fused scores from different systems to obtain the final decision score for spoof detection. During the evaluation phase, the proposed methods achieved 0.3948 minDCF and 14.33% EER in the close condition, and 0.0750 minDCF and 2.59% EER in the open condition, demonstrating the robustness of our submitted systems under adversarial conditions. In Track 2, we continued using the CM system from Track 1 and fused it with a CNN-based ASV system. This approach achieved 0.2814 min-aDCF in the closed condition and 0.0756 min-aDCF in the open condition, showcasing superior performance in the SASV system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01695v1-abstract-full').style.display = 'none'; document.getElementById('2409.01695v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ASVspoof5 workshop paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07264">arXiv:2408.07264</a> <span> [<a href="https://arxiv.org/pdf/2408.07264">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1002/ima.22933">10.1002/ima.22933 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Lesion-aware network for diabetic retinopathy diagnosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xue Xia</a>, <a href="/search/eess?searchtype=author&query=Zhan%2C+K">Kun Zhan</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yuming Fang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+W">Wenhui Jiang</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+F">Fei Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07264v1-abstract-short" style="display: inline;"> Deep learning brought boosts to auto diabetic retinopathy (DR) diagnosis, thus, greatly helping ophthalmologists for early disease detection, which contributes to preventing disease deterioration that may eventually lead to blindness. It has been proved that convolutional neural network (CNN)-aided lesion identifying or segmentation benefits auto DR screening. The key to fine-grained lesion tasks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07264v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07264v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07264v1-abstract-full" style="display: none;"> Deep learning brought boosts to auto diabetic retinopathy (DR) diagnosis, thus, greatly helping ophthalmologists for early disease detection, which contributes to preventing disease deterioration that may eventually lead to blindness. It has been proved that convolutional neural network (CNN)-aided lesion identifying or segmentation benefits auto DR screening. The key to fine-grained lesion tasks mainly lies in: (1) extracting features being both sensitive to tiny lesions and robust against DR-irrelevant interference, and (2) exploiting and re-using encoded information to restore lesion locations under extremely imbalanced data distribution. To this end, we propose a CNN-based DR diagnosis network with attention mechanism involved, termed lesion-aware network, to better capture lesion information from imbalanced data. Specifically, we design the lesion-aware module (LAM) to capture noise-like lesion areas across deeper layers, and the feature-preserve module (FPM) to assist shallow-to-deep feature fusion. Afterward, the proposed lesion-aware network (LANet) is constructed by embedding the LAM and FPM into the CNN decoders for DR-related information utilization. The proposed LANet is then further extended to a DR screening network by adding a classification layer. Through experiments on three public fundus datasets with pixel-level annotations, our method outperforms the mainstream methods with an area under curve of 0.967 in DR screening, and increases the overall average precision by 7.6%, 2.1%, and 1.2% in lesion segmentation on three datasets. Besides, the ablation study validates the effectiveness of the proposed sub-modules. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07264v1-abstract-full').style.display = 'none'; document.getElementById('2408.07264v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is submitted version wihout improvements by reviewers. The final version is published on International Journal of Imaging Systems and Techonology (https://onlinelibrary.wiley.com/doi/10.1002/ima.22933)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06588">arXiv:2408.06588</a> <span> [<a href="https://arxiv.org/pdf/2408.06588">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Orbital-Angular-Momentum Versus MIMO: Orthogonality, Degree of Freedom,and Capacity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jing%2C+H">Haiyue Jing</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+W">Wenchi Cheng</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Hailin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06588v1-abstract-short" style="display: inline;"> The plane wave based wireless communications have becoming more and more matured, along with the well utilization of the traditional resources such as time and frequency. To further increase the capacity for rapidly increasing capacity demand of wireless communications, it is potential to use the twist wave, which has the orbital angular momentum (OAM). In this paper, we discuss the OAM based wire… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06588v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06588v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06588v1-abstract-full" style="display: none;"> The plane wave based wireless communications have becoming more and more matured, along with the well utilization of the traditional resources such as time and frequency. To further increase the capacity for rapidly increasing capacity demand of wireless communications, it is potential to use the twist wave, which has the orbital angular momentum (OAM). In this paper, we discuss the OAM based wireless communications in the aspect of orthogonality, degree of freedom (DoF), and capacity, where both the transmitter and the receiver use uniform circular array (UCA) antennas. In particular, we compare OAM based wireless communications with multiple-input-multiple-output (MIMO) based wireless communications in terms of DoF and capacity. Numerical results are presented to validate and evaluate that the DoF of OAM based wireless communications is greater than or equal to that of correlated MIMO based wireless communications when the transmitter and the receiver antennas are aligned well. The OAM based wireless communications can achieve larger capacity than the correlated MIMO in high signal-to-noise ratio (SNR) region under line-of-sight scenario. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06588v1-abstract-full').style.display = 'none'; document.getElementById('2408.06588v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06584">arXiv:2408.06584</a> <span> [<a href="https://arxiv.org/pdf/2408.06584">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Fast Transceiver Design for RIS-Assisted MIMO mmWave Wireless Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jing%2C+H">Haiyue Jing</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+W">Wenchi Cheng</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06584v1-abstract-short" style="display: inline;"> Due to high bandwidth and small antenna size, millimeter-wave (mmWave) integrated line-of-sight (LOS) multiple-input-multiple-output (MIMO) systems have attracted much attention. Reconfigurable intelligent surfaces (RISs), which have the potential to change the characteristics of incident electromagnetic waves with low power cost, can improve the performance or the MIMO mmWave wireless communicati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06584v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06584v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06584v1-abstract-full" style="display: none;"> Due to high bandwidth and small antenna size, millimeter-wave (mmWave) integrated line-of-sight (LOS) multiple-input-multiple-output (MIMO) systems have attracted much attention. Reconfigurable intelligent surfaces (RISs), which have the potential to change the characteristics of incident electromagnetic waves with low power cost, can improve the performance or the MIMO mmWave wireless communications. Uniform circular array (UCA) is an effective antenna structure with low complexity transceiver. In this paper, UCA based RIS-assisted MIMO mmWave wireless communications with transmit UCA, the RIS UCAs, and receive UCA are investigated. Since the rotation angles between the transceiver make the channel matrix noncirculant, an algorithm is developed to derive the ranges of the rotation angles based on an acceptable error and reduce the impact of rotation angles on channel matrix. Then, we propose a low-complexity precoding scheme at the transmitter, phase designs at the RIS UCAs, and a phase compensation scheme at the receiver, which can convert the channel matrix into an equivalent circulant channel matrix with a small error. Then, a fast symbol-wise maximum likelihood (ML) detection scheme is proposed to recover the signals with low computational complexity. Simulation results are presented to illustrate the theory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06584v1-abstract-full').style.display = 'none'; document.getElementById('2408.06584v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19503">arXiv:2407.19503</a> <span> [<a href="https://arxiv.org/pdf/2407.19503">pdf</a>, <a href="https://arxiv.org/ps/2407.19503">ps</a>, <a href="https://arxiv.org/format/2407.19503">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Discrete Spectrum Analysis of Vector OFDM Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+W">Wei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19503v1-abstract-short" style="display: inline;"> Vector OFDM (VOFDM) is equivalent to OTFS and is good for time-varying channels. However, due to its vector form, its signal spectrum is not as clear as that of the conventional OFDM. In this paper, we study the discrete spectrum of discrete VOFDM signals. We obtain a linear relationship between a vector of information symbols and a vector of the same size of components evenly distributed in the d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19503v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19503v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19503v1-abstract-full" style="display: none;"> Vector OFDM (VOFDM) is equivalent to OTFS and is good for time-varying channels. However, due to its vector form, its signal spectrum is not as clear as that of the conventional OFDM. In this paper, we study the discrete spectrum of discrete VOFDM signals. We obtain a linear relationship between a vector of information symbols and a vector of the same size of components evenly distributed in the discrete VOFDM signal spectrum, and show that if a vector of information symbols is set to 0, then a corresponding vector of the same size of the discrete VOFDM signal spectrum is 0 as well, where the components of the 0 vector are not together but evenly distributed in the spectrum. With the linear relationship, the information symbol vectors can be locally precoded so that any of the discrete spectrum of VOFDM signals can be set to 0, similar to that of the conventional OFDM signals. These results are verified by simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19503v1-abstract-full').style.display = 'none'; document.getElementById('2407.19503v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17882">arXiv:2407.17882</a> <span> [<a href="https://arxiv.org/pdf/2407.17882">pdf</a>, <a href="https://arxiv.org/format/2407.17882">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Artificial Immunofluorescence in a Flash: Rapid Synthetic Imaging from Brightfield Through Residual Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+C">Chunling Tang</a>, <a href="/search/eess?searchtype=author&query=Murdoch%2C+S">Siofra Murdoch</a>, <a href="/search/eess?searchtype=author&query=Papanastasiou%2C+G">Giorgos Papanastasiou</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yunzhe Guo</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xianglu Xiao</a>, <a href="/search/eess?searchtype=author&query=Cross-Zamirski%2C+J">Jan Cross-Zamirski</a>, <a href="/search/eess?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+K+X">Kristina Xiao Liang</a>, <a href="/search/eess?searchtype=author&query=Niu%2C+Z">Zhangming Niu</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+E+F">Evandro Fei Fang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yinhai Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17882v1-abstract-short" style="display: inline;"> Immunofluorescent (IF) imaging is crucial for visualizing biomarker expressions, cell morphology and assessing the effects of drug treatments on sub-cellular components. IF imaging needs extra staining process and often requiring cell fixation, therefore it may also introduce artefects and alter endogenouous cell morphology. Some IF stains are expensive or not readily available hence hindering exp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17882v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17882v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17882v1-abstract-full" style="display: none;"> Immunofluorescent (IF) imaging is crucial for visualizing biomarker expressions, cell morphology and assessing the effects of drug treatments on sub-cellular components. IF imaging needs extra staining process and often requiring cell fixation, therefore it may also introduce artefects and alter endogenouous cell morphology. Some IF stains are expensive or not readily available hence hindering experiments. Recent diffusion models, which synthesise high-fidelity IF images from easy-to-acquire brightfield (BF) images, offer a promising solution but are hindered by training instability and slow inference times due to the noise diffusion process. This paper presents a novel method for the conditional synthesis of IF images directly from BF images along with cell segmentation masks. Our approach employs a Residual Diffusion process that enhances stability and significantly reduces inference time. We performed a critical evaluation against other image-to-image synthesis models, including UNets, GANs, and advanced diffusion models. Our model demonstrates significant improvements in image quality (p<0.05 in MSE, PSNR, and SSIM), inference speed (26 times faster than competing diffusion models), and accurate segmentation results for both nuclei and cell bodies (0.77 and 0.63 mean IOU for nuclei and cell true positives, respectively). This paper is a substantial advancement in the field, providing robust and efficient tools for cell image analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17882v1-abstract-full').style.display = 'none'; document.getElementById('2407.17882v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10848">arXiv:2407.10848</a> <span> [<a href="https://arxiv.org/pdf/2407.10848">pdf</a>, <a href="https://arxiv.org/format/2407.10848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TWC.2024.3429495">10.1109/TWC.2024.3429495 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> On the Spectral Efficiency of Multi-user Holographic MIMO Uplink Transmission </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qian%2C+M">Mengyu Qian</a>, <a href="/search/eess?searchtype=author&query=You%2C+L">Li You</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10848v1-abstract-short" style="display: inline;"> With antenna spacing much less than half a wavelength in confined space, holographic multiple-input multiple-output (HMIMO) technology presents a promising frontier in next-generation mobile communication. We delve into the research of the multi-user uplink transmission with both the base station and the users equipped with holographic planar arrays. To begin, we construct an HMIMO channel model u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10848v1-abstract-full').style.display = 'inline'; document.getElementById('2407.10848v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10848v1-abstract-full" style="display: none;"> With antenna spacing much less than half a wavelength in confined space, holographic multiple-input multiple-output (HMIMO) technology presents a promising frontier in next-generation mobile communication. We delve into the research of the multi-user uplink transmission with both the base station and the users equipped with holographic planar arrays. To begin, we construct an HMIMO channel model utilizing electromagnetic field equations, accompanied by a colored noise model that accounts for both electromagnetic interference and hardware noise. Since this model is continuous, we approximate it within a finite-dimensional space spanned by Fourier space series, which can be defined as the communication mode functions. We show that this channel model samples Green's function in the wavenumber domain in different communication modes. Subsequently, we tackle the challenging task of maximizing the spectral efficiency (SE) of the system, which involves optimizing the continuous current density function (CDF) for each user. Using the aforementioned approximation model, we transform the optimization variables into expansion coefficients of the CDFs on a finite-dimensional space, for which we propose an iterative water-filling algorithm. Simulation results illustrate the efficacy of the proposed algorithm in enhancing the system SE and show the influence of the colored noise and the system parameters on the SE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10848v1-abstract-full').style.display = 'none'; document.getElementById('2407.10848v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 7 figures, to appear in IEEE Transactions on Wireless Communications</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Wireless Communications, vol. 23, no. 10, pp. 15421-15434, Oct. 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09507">arXiv:2407.09507</a> <span> [<a href="https://arxiv.org/pdf/2407.09507">pdf</a>, <a href="https://arxiv.org/format/2407.09507">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Can Generative AI Replace Immunofluorescent Staining Processes? A Comparison Study of Synthetically Generated CellPainting Images from Brightfield </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Murdoch%2C+S">Siofra Murdoch</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+C">Chunling Tang</a>, <a href="/search/eess?searchtype=author&query=Papanastasiou%2C+G">Giorgos Papanastasiou</a>, <a href="/search/eess?searchtype=author&query=Cross-Zamirski%2C+J">Jan Cross-Zamirski</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yunzhe Guo</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xianglu Xiao</a>, <a href="/search/eess?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yinhai Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09507v2-abstract-short" style="display: inline;"> Cell imaging assays utilizing fluorescence stains are essential for observing sub-cellular organelles and their responses to perturbations. Immunofluorescent staining process is routinely in labs, however the recent innovations in generative AI is challenging the idea of IF staining are required. This is especially true when the availability and cost of specific fluorescence dyes is a problem to s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09507v2-abstract-full').style.display = 'inline'; document.getElementById('2407.09507v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09507v2-abstract-full" style="display: none;"> Cell imaging assays utilizing fluorescence stains are essential for observing sub-cellular organelles and their responses to perturbations. Immunofluorescent staining process is routinely in labs, however the recent innovations in generative AI is challenging the idea of IF staining are required. This is especially true when the availability and cost of specific fluorescence dyes is a problem to some labs. Furthermore, staining process takes time and leads to inter-intra technician and hinders downstream image and data analysis, and the reusability of image data for other projects. Recent studies showed the use of generated synthetic immunofluorescence (IF) images from brightfield (BF) images using generative AI algorithms in the literature. Therefore, in this study, we benchmark and compare five models from three types of IF generation backbones, CNN, GAN, and diffusion models, using a publicly available dataset. This paper not only serves as a comparative study to determine the best-performing model but also proposes a comprehensive analysis pipeline for evaluating the efficacy of generators in IF image synthesis. We highlighted the potential of deep learning-based generators for IF image synthesis, while also discussed potential issues and future research directions. Although generative AI shows promise in simplifying cell phenotyping using only BF images with IF staining, further research and validations are needed to address the key challenges of model generalisability, batch effects, feature relevance and computational costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09507v2-abstract-full').style.display = 'none'; document.getElementById('2407.09507v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05259">arXiv:2407.05259</a> <span> [<a href="https://arxiv.org/pdf/2407.05259">pdf</a>, <a href="https://arxiv.org/format/2407.05259">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-scale Conditional Generative Modeling for Microscopic Image Restoration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+L">Luzhe Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiongye Xiao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S">Shixuan Li</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jiawen Sun</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yi Huang</a>, <a href="/search/eess?searchtype=author&query=Ozcan%2C+A">Aydogan Ozcan</a>, <a href="/search/eess?searchtype=author&query=Bogdan%2C+P">Paul Bogdan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05259v1-abstract-short" style="display: inline;"> The advance of diffusion-based generative models in recent years has revolutionized state-of-the-art (SOTA) techniques in a wide variety of image analysis and synthesis tasks, whereas their adaptation on image restoration, particularly within computational microscopy remains theoretically and empirically underexplored. In this research, we introduce a multi-scale generative model that enhances con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05259v1-abstract-full').style.display = 'inline'; document.getElementById('2407.05259v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05259v1-abstract-full" style="display: none;"> The advance of diffusion-based generative models in recent years has revolutionized state-of-the-art (SOTA) techniques in a wide variety of image analysis and synthesis tasks, whereas their adaptation on image restoration, particularly within computational microscopy remains theoretically and empirically underexplored. In this research, we introduce a multi-scale generative model that enhances conditional image restoration through a novel exploitation of the Brownian Bridge process within wavelet domain. By initiating the Brownian Bridge diffusion process specifically at the lowest-frequency subband and applying generative adversarial networks at subsequent multi-scale high-frequency subbands in the wavelet domain, our method provides significant acceleration during training and sampling while sustaining a high image generation quality and diversity on par with SOTA diffusion models. Experimental results on various computational microscopy and imaging tasks confirm our method's robust performance and its considerable reduction in its sampling steps and time. This pioneering technique offers an efficient image restoration framework that harmonizes efficiency with quality, signifying a major stride in incorporating cutting-edge generative models into computational microscopy workflows. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05259v1-abstract-full').style.display = 'none'; document.getElementById('2407.05259v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01083">arXiv:2407.01083</a> <span> [<a href="https://arxiv.org/pdf/2407.01083">pdf</a>, <a href="https://arxiv.org/ps/2407.01083">ps</a>, <a href="https://arxiv.org/format/2407.01083">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Note On the Clark Conjecture On Time-Warped Bandlimited Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01083v1-abstract-short" style="display: inline;"> In this note, a result of a previous paper on the Clark conjecture on time-warped bandlimited signals is extended to a more general class of the time warping functions, which includes most of the common functions in practice. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01083v1-abstract-full" style="display: none;"> In this note, a result of a previous paper on the Clark conjecture on time-warped bandlimited signals is extended to a more general class of the time warping functions, which includes most of the common functions in practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01083v1-abstract-full').style.display = 'none'; document.getElementById('2407.01083v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09822">arXiv:2406.09822</a> <span> [<a href="https://arxiv.org/pdf/2406.09822">pdf</a>, <a href="https://arxiv.org/format/2406.09822">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TWC.2024.3509382">10.1109/TWC.2024.3509382 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> An I2I Inpainting Approach for Efficient Channel Knowledge Map Construction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jin%2C+Z">Zhenzhou Jin</a>, <a href="/search/eess?searchtype=author&query=You%2C+L">Li You</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jue Wang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09822v1-abstract-short" style="display: inline;"> Channel knowledge map (CKM) has received widespread attention as an emerging enabling technology for environment-aware wireless communications. It involves the construction of databases containing location-specific channel knowledge, which are then leveraged to facilitate channel state information (CSI) acquisition and transceiver design. In this context, a fundamental challenge lies in efficientl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09822v1-abstract-full').style.display = 'inline'; document.getElementById('2406.09822v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09822v1-abstract-full" style="display: none;"> Channel knowledge map (CKM) has received widespread attention as an emerging enabling technology for environment-aware wireless communications. It involves the construction of databases containing location-specific channel knowledge, which are then leveraged to facilitate channel state information (CSI) acquisition and transceiver design. In this context, a fundamental challenge lies in efficiently constructing the CKM based on a given wireless propagation environment. Most existing methods are based on stochastic modeling and sequence prediction, which do not fully exploit the inherent physical characteristics of the propagation environment, resulting in low accuracy and high computational complexity. To address these limitations, we propose a Laplacian pyramid (LP)-based CKM construction scheme to predict the channel knowledge at arbitrary locations in a targeted area. Specifically, we first view the channel knowledge as a 2-D image and transform the CKM construction problem into an image-to-image (I2I) inpainting task, which predicts the channel knowledge at a specific location by recovering the corresponding pixel value in the image matrix. Then, inspired by the reversible and closed-form structure of the LP, we show its natural suitability for our task in designing a fast I2I mapping network. For different frequency components of LP decomposition, we design tailored networks accordingly. Besides, to encode the global structural information of the propagation environment, we introduce self-attention and cross-covariance attention mechanisms in different layers, respectively. Finally, experimental results show that the proposed scheme outperforms the benchmark, achieving higher reconstruction accuracy while with lower computational complexity. Moreover, the proposed approach has a strong generalization ability and can be implemented in different wireless communication scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09822v1-abstract-full').style.display = 'none'; document.getElementById('2406.09822v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 11 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Wireless Communications, vol. 24, no. 2, pp. 1415-1429, Feb. 2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07498">arXiv:2406.07498</a> <span> [<a href="https://arxiv.org/pdf/2406.07498">pdf</a>, <a href="https://arxiv.org/format/2406.07498">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> RaD-Net 2: A causal two-stage repairing and denoising speech enhancement network with knowledge distillation and complex axial self-attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+M">Mingshuai Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhuangqi Chen</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+X">Xiaopeng Yan</a>, <a href="/search/eess?searchtype=author&query=Lv%2C+Y">Yuanjun Lv</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuanzeng Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07498v1-abstract-short" style="display: inline;"> In real-time speech communication systems, speech signals are often degraded by multiple distortions. Recently, a two-stage Repair-and-Denoising network (RaD-Net) was proposed with superior speech quality improvement in the ICASSP 2024 Speech Signal Improvement (SSI) Challenge. However, failure to use future information and constraint receptive field of convolution layers limit the system's perfor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07498v1-abstract-full').style.display = 'inline'; document.getElementById('2406.07498v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07498v1-abstract-full" style="display: none;"> In real-time speech communication systems, speech signals are often degraded by multiple distortions. Recently, a two-stage Repair-and-Denoising network (RaD-Net) was proposed with superior speech quality improvement in the ICASSP 2024 Speech Signal Improvement (SSI) Challenge. However, failure to use future information and constraint receptive field of convolution layers limit the system's performance. To mitigate these problems, we extend RaD-Net to its upgraded version, RaD-Net 2. Specifically, a causality-based knowledge distillation is introduced in the first stage to use future information in a causal way. We use the non-causal repairing network as the teacher to improve the performance of the causal repairing network. In addition, in the second stage, complex axial self-attention is applied in the denoising network's complex feature encoder/decoder. Experimental results on the ICASSP 2024 SSI Challenge blind test set show that RaD-Net 2 brings 0.10 OVRL DNSMOS improvement compared to RaD-Net. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07498v1-abstract-full').style.display = 'none'; document.getElementById('2406.07498v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Interspeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05961">arXiv:2406.05961</a> <span> [<a href="https://arxiv.org/pdf/2406.05961">pdf</a>, <a href="https://arxiv.org/format/2406.05961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> BS-PLCNet 2: Two-stage Band-split Packet Loss Concealment Network with Intra-model Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zihan Zhang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuanzeng Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05961v1-abstract-short" style="display: inline;"> Audio packet loss is an inevitable problem in real-time speech communication. A band-split packet loss concealment network (BS-PLCNet) targeting full-band signals was recently proposed. Although it performs superiorly in the ICASSP 2024 PLC Challenge, BS-PLCNet is a large model with high computational complexity of 8.95G FLOPS. This paper presents its updated version, BS-PLCNet 2, to reduce comput… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05961v1-abstract-full').style.display = 'inline'; document.getElementById('2406.05961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05961v1-abstract-full" style="display: none;"> Audio packet loss is an inevitable problem in real-time speech communication. A band-split packet loss concealment network (BS-PLCNet) targeting full-band signals was recently proposed. Although it performs superiorly in the ICASSP 2024 PLC Challenge, BS-PLCNet is a large model with high computational complexity of 8.95G FLOPS. This paper presents its updated version, BS-PLCNet 2, to reduce computational complexity and improve performance further. Specifically, to compensate for the missing future information, in the wide-band module, we design a dual-path encoder structure (with non-causal and causal path) and leverage an intra-model knowledge distillation strategy to distill the future information from the non-causal teacher to the casual student. Moreover, we introduce a lightweight post-processing module after packet loss restoration to recover speech distortions and remove residual noise in the audio signal. With only 40% of original parameters in BS-PLCNet, BS-PLCNet 2 brings 0.18 PLCMOS improvement on the ICASSP 2024 PLC challenge blind set, achieving state-of-the-art performance on this dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05961v1-abstract-full').style.display = 'none'; document.getElementById('2406.05961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Interspeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04586">arXiv:2406.04586</a> <span> [<a href="https://arxiv.org/pdf/2406.04586">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Simple Channel Independent Beamforming Scheme With Parallel Uniform Circular Array </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jing%2C+H">Haiyue Jing</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+W">Wenchi Cheng</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04586v1-abstract-short" style="display: inline;"> In this letter, we consider a uniform circular array (UCA)-based line-of-sight multiple-input-multiple-output system, where the transmit and receive UCAs are parallel but non-coaxial with each other. We propose a simple channel-independent beamforming scheme with fast symbol-wise maximum likelihood detection. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04586v1-abstract-full" style="display: none;"> In this letter, we consider a uniform circular array (UCA)-based line-of-sight multiple-input-multiple-output system, where the transmit and receive UCAs are parallel but non-coaxial with each other. We propose a simple channel-independent beamforming scheme with fast symbol-wise maximum likelihood detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04586v1-abstract-full').style.display = 'none'; document.getElementById('2406.04586v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been published in IEEE Communications Letters. arXiv admin note: substantial text overlap with arXiv:1804.06621</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11883">arXiv:2405.11883</a> <span> [<a href="https://arxiv.org/pdf/2405.11883">pdf</a>, <a href="https://arxiv.org/format/2405.11883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Asynchronous MIMO-OFDM Massive Unsourced Random Access with Codeword Collisions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianya Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yongpeng Wu</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+J">Junyuan Gao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+W">Wenjun Zhang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Ng%2C+D+W+K">Derrick Wing Kwan Ng</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+C">Chengshan Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11883v2-abstract-short" style="display: inline;"> This paper investigates asynchronous multiple-input multiple-output (MIMO) massive unsourced random access (URA) in an orthogonal frequency division multiplexing (OFDM) system over frequency-selective fading channels, with the presence of both timing and carrier frequency offsets (TO and CFO) and non-negligible codeword collisions. The proposed coding framework segregates the data into two compone… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11883v2-abstract-full').style.display = 'inline'; document.getElementById('2405.11883v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11883v2-abstract-full" style="display: none;"> This paper investigates asynchronous multiple-input multiple-output (MIMO) massive unsourced random access (URA) in an orthogonal frequency division multiplexing (OFDM) system over frequency-selective fading channels, with the presence of both timing and carrier frequency offsets (TO and CFO) and non-negligible codeword collisions. The proposed coding framework segregates the data into two components, namely, preamble and coding parts, with the former being tree-coded and the latter LDPC-coded. By leveraging the dual sparsity of the equivalent channel across both codeword and delay domains (CD and DD), we develop a message-passing-based sparse Bayesian learning algorithm, combined with belief propagation and mean field, to iteratively estimate DD channel responses, TO, and delay profiles. Furthermore, by jointly leveraging the observations among multiple slots, we establish a novel graph-based algorithm to iteratively separate the superimposed channels and compensate for the phase rotations. Additionally, the proposed algorithm is applied to the flat fading scenario to estimate both TO and CFO, where the channel and offset estimation is enhanced by leveraging the geometric characteristics of the signal constellation. Extensive simulations reveal that the proposed algorithm achieves superior performance and substantial complexity reduction in both channel and offset estimation compared to the codebook enlarging-based counterparts, and enhanced data recovery performances compared to state-of-the-art URA schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11883v2-abstract-full').style.display = 'none'; document.getElementById('2405.11883v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by the IEEE Transactions on Wireless Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09734">arXiv:2404.09734</a> <span> [<a href="https://arxiv.org/pdf/2404.09734">pdf</a>, <a href="https://arxiv.org/format/2404.09734">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Weighted Sum-Rate Maximization for Movable Antenna-Enhanced Wireless Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+B">Biqian Feng</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yongpeng Wu</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+C">Chengshan Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09734v1-abstract-short" style="display: inline;"> This letter investigates the weighted sum rate maximization problem in movable antenna (MA)-enhanced systems. To reduce the computational complexity, we transform it into a more tractable weighted minimum mean square error (WMMSE) problem well-suited for MA. We then adopt the WMMSE algorithm and majorization-minimization algorithm to optimize the beamforming and antenna positions, respectively. Mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09734v1-abstract-full').style.display = 'inline'; document.getElementById('2404.09734v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09734v1-abstract-full" style="display: none;"> This letter investigates the weighted sum rate maximization problem in movable antenna (MA)-enhanced systems. To reduce the computational complexity, we transform it into a more tractable weighted minimum mean square error (WMMSE) problem well-suited for MA. We then adopt the WMMSE algorithm and majorization-minimization algorithm to optimize the beamforming and antenna positions, respectively. Moreover, we propose a planar movement mode, which constrains each MA to a specified area, we obtain a low-complexity closed-form solution. Numerical results demonstrate that the MA-enhanced system outperforms the conventional system. Besides, the computation time for the planar movement mode is reduced by approximately 30\% at a little performance expense. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09734v1-abstract-full').style.display = 'none'; document.getElementById('2404.09734v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Wireless Communications Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.07425">arXiv:2404.07425</a> <span> [<a href="https://arxiv.org/pdf/2404.07425">pdf</a>, <a href="https://arxiv.org/ps/2404.07425">ps</a>, <a href="https://arxiv.org/format/2404.07425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Precoder Design for User-Centric Network Massive MIMO with Matrix Manifold Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sun%2C+R">Rui Sun</a>, <a href="/search/eess?searchtype=author&query=You%2C+L">Li You</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+A">An-An Lu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+C">Chen Sun</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.07425v1-abstract-short" style="display: inline;"> In this paper, we investigate the precoder design for user-centric network (UCN) massive multiple-input multiple-output (mMIMO) downlink with matrix manifold optimization. In UCN mMIMO systems, each user terminal (UT) is served by a subset of base stations (BSs) instead of all the BSs, facilitating the implementation of the system and lowering the dimension of the precoders to be designed. By prov… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07425v1-abstract-full').style.display = 'inline'; document.getElementById('2404.07425v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.07425v1-abstract-full" style="display: none;"> In this paper, we investigate the precoder design for user-centric network (UCN) massive multiple-input multiple-output (mMIMO) downlink with matrix manifold optimization. In UCN mMIMO systems, each user terminal (UT) is served by a subset of base stations (BSs) instead of all the BSs, facilitating the implementation of the system and lowering the dimension of the precoders to be designed. By proving that the precoder set satisfying the per-BS power constraints forms a Riemannian submanifold of a linear product manifold, we transform the constrained precoder design problem in Euclidean space to an unconstrained one on the Riemannian submanifold. Riemannian ingredients, including orthogonal projection, Riemannian gradient, retraction and vector transport, of the problem on the Riemannian submanifold are further derived, with which the Riemannian conjugate gradient (RCG) design method is proposed for solving the unconstrained problem. The proposed method avoids the inverses of large dimensional matrices, which is beneficial in practice. The complexity analyses show the high computational efficiency of RCG precoder design. Simulation results demonstrate the numerical superiority of the proposed precoder design and the high efficiency of the UCN mMIMO system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07425v1-abstract-full').style.display = 'none'; document.getElementById('2404.07425v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 9 figures, journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07954">arXiv:2403.07954</a> <span> [<a href="https://arxiv.org/pdf/2403.07954">pdf</a>, <a href="https://arxiv.org/format/2403.07954">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Polynomial Graph Filters: A Novel Adaptive Krylov Subspace Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+K">Keke Huang</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+W">Wencai Cao</a>, <a href="/search/eess?searchtype=author&query=Ta%2C+H">Hoang Ta</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiaokui Xiao</a>, <a href="/search/eess?searchtype=author&query=Li%C3%B2%2C+P">Pietro Li貌</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07954v2-abstract-short" style="display: inline;"> Graph Neural Networks (GNNs), known as spectral graph filters, find a wide range of applications in web networks. To bypass eigendecomposition, polynomial graph filters are proposed to approximate graph filters by leveraging various polynomial bases for filter training. However, no existing studies have explored the diverse polynomial graph filters from a unified perspective for optimization. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07954v2-abstract-full').style.display = 'inline'; document.getElementById('2403.07954v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07954v2-abstract-full" style="display: none;"> Graph Neural Networks (GNNs), known as spectral graph filters, find a wide range of applications in web networks. To bypass eigendecomposition, polynomial graph filters are proposed to approximate graph filters by leveraging various polynomial bases for filter training. However, no existing studies have explored the diverse polynomial graph filters from a unified perspective for optimization. In this paper, we first unify polynomial graph filters, as well as the optimal filters of identical degrees into the Krylov subspace of the same order, thus providing equivalent expressive power theoretically. Next, we investigate the asymptotic convergence property of polynomials from the unified Krylov subspace perspective, revealing their limited adaptability in graphs with varying heterophily degrees. Inspired by those facts, we design a novel adaptive Krylov subspace approach to optimize polynomial bases with provable controllability over the graph spectrum so as to adapt various heterophily graphs. Subsequently, we propose AdaptKry, an optimized polynomial graph filter utilizing bases from the adaptive Krylov subspaces. Meanwhile, in light of the diverse spectral properties of complex graphs, we extend AdaptKry by leveraging multiple adaptive Krylov bases without incurring extra training costs. As a consequence, extended AdaptKry is able to capture the intricate characteristics of graphs and provide insights into their inherent complexity. We conduct extensive experiments across a series of real-world datasets. The experimental results demonstrate the superior filtering capability of AdaptKry, as well as the optimized efficacy of the adaptive Krylov basis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07954v2-abstract-full').style.display = 'none'; document.getElementById('2403.07954v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01271">arXiv:2402.01271</a> <span> [<a href="https://arxiv.org/pdf/2402.01271">pdf</a>, <a href="https://arxiv.org/format/2402.01271">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> An Intra-BRNN and GB-RVQ Based END-TO-END Neural Audio Codec </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+L">Linping Xu</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+J">Jiawei Jiang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+D">Dejun Zhang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+L">Li Chen</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+P">Piao Ding</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenyi Song</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+S">Sixing Yin</a>, <a href="/search/eess?searchtype=author&query=Sohel%2C+F">Ferdous Sohel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01271v1-abstract-short" style="display: inline;"> Recently, neural networks have proven to be effective in performing speech coding task at low bitrates. However, under-utilization of intra-frame correlations and the error of quantizer specifically degrade the reconstructed audio quality. To improve the coding quality, we present an end-to-end neural speech codec, namely CBRC (Convolutional and Bidirectional Recurrent neural Codec). An interleave… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01271v1-abstract-full').style.display = 'inline'; document.getElementById('2402.01271v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01271v1-abstract-full" style="display: none;"> Recently, neural networks have proven to be effective in performing speech coding task at low bitrates. However, under-utilization of intra-frame correlations and the error of quantizer specifically degrade the reconstructed audio quality. To improve the coding quality, we present an end-to-end neural speech codec, namely CBRC (Convolutional and Bidirectional Recurrent neural Codec). An interleaved structure using 1D-CNN and Intra-BRNN is designed to exploit the intra-frame correlations more efficiently. Furthermore, Group-wise and Beam-search Residual Vector Quantizer (GB-RVQ) is used to reduce the quantization noise. CBRC encodes audio every 20ms with no additional latency, which is suitable for real-time communication. Experimental results demonstrate the superiority of the proposed codec when comparing CBRC at 3kbps with Opus at 12kbps. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01271v1-abstract-full').style.display = 'none'; document.getElementById('2402.01271v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">INTERSPEECH 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13197">arXiv:2401.13197</a> <span> [<a href="https://arxiv.org/pdf/2401.13197">pdf</a>, <a href="https://arxiv.org/format/2401.13197">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Predicting Mitral Valve mTEER Surgery Outcomes Using Machine Learning and Deep Learning Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Vyas%2C+T">Tejas Vyas</a>, <a href="/search/eess?searchtype=author&query=Chowdhury%2C+M">Mohsena Chowdhury</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiaojiao Xiao</a>, <a href="/search/eess?searchtype=author&query=Claeys%2C+M">Mathias Claeys</a>, <a href="/search/eess?searchtype=author&query=Ong%2C+G">G茅raldine Ong</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+G">Guanghui Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13197v1-abstract-short" style="display: inline;"> Mitral Transcatheter Edge-to-Edge Repair (mTEER) is a medical procedure utilized for the treatment of mitral valve disorders. However, predicting the outcome of the procedure poses a significant challenge. This paper makes the first attempt to harness classical machine learning (ML) and deep learning (DL) techniques for predicting mitral valve mTEER surgery outcomes. To achieve this, we compiled a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13197v1-abstract-full').style.display = 'inline'; document.getElementById('2401.13197v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13197v1-abstract-full" style="display: none;"> Mitral Transcatheter Edge-to-Edge Repair (mTEER) is a medical procedure utilized for the treatment of mitral valve disorders. However, predicting the outcome of the procedure poses a significant challenge. This paper makes the first attempt to harness classical machine learning (ML) and deep learning (DL) techniques for predicting mitral valve mTEER surgery outcomes. To achieve this, we compiled a dataset from 467 patients, encompassing labeled echocardiogram videos and patient reports containing Transesophageal Echocardiography (TEE) measurements detailing Mitral Valve Repair (MVR) treatment outcomes. Leveraging this dataset, we conducted a benchmark evaluation of six ML algorithms and two DL models. The results underscore the potential of ML and DL in predicting mTEER surgery outcomes, providing insight for future investigation and advancements in this domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13197v1-abstract-full').style.display = 'none'; document.getElementById('2401.13197v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08887">arXiv:2401.08887</a> <span> [<a href="https://arxiv.org/pdf/2401.08887">pdf</a>, <a href="https://arxiv.org/ps/2401.08887">ps</a>, <a href="https://arxiv.org/format/2401.08887">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> NOTSOFAR-1 Challenge: New Datasets, Baseline, and Tasks for Distant Meeting Transcription </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Vinnikov%2C+A">Alon Vinnikov</a>, <a href="/search/eess?searchtype=author&query=Ivry%2C+A">Amir Ivry</a>, <a href="/search/eess?searchtype=author&query=Hurvitz%2C+A">Aviv Hurvitz</a>, <a href="/search/eess?searchtype=author&query=Abramovski%2C+I">Igor Abramovski</a>, <a href="/search/eess?searchtype=author&query=Koubi%2C+S">Sharon Koubi</a>, <a href="/search/eess?searchtype=author&query=Gurvich%2C+I">Ilya Gurvich</a>, <a href="/search/eess?searchtype=author&query=Pe%60er%2C+S">Shai Pe`er</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiong Xiao</a>, <a href="/search/eess?searchtype=author&query=Elizalde%2C+B+M">Benjamin Martinez Elizalde</a>, <a href="/search/eess?searchtype=author&query=Kanda%2C+N">Naoyuki Kanda</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaofei Wang</a>, <a href="/search/eess?searchtype=author&query=Shaer%2C+S">Shalev Shaer</a>, <a href="/search/eess?searchtype=author&query=Yagev%2C+S">Stav Yagev</a>, <a href="/search/eess?searchtype=author&query=Asher%2C+Y">Yossi Asher</a>, <a href="/search/eess?searchtype=author&query=Sivasankaran%2C+S">Sunit Sivasankaran</a>, <a href="/search/eess?searchtype=author&query=Gong%2C+Y">Yifan Gong</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+M">Min Tang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Huaming Wang</a>, <a href="/search/eess?searchtype=author&query=Krupka%2C+E">Eyal Krupka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08887v1-abstract-short" style="display: inline;"> We introduce the first Natural Office Talkers in Settings of Far-field Audio Recordings (``NOTSOFAR-1'') Challenge alongside datasets and baseline system. The challenge focuses on distant speaker diarization and automatic speech recognition (DASR) in far-field meeting scenarios, with single-channel and known-geometry multi-channel tracks, and serves as a launch platform for two new datasets: First… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08887v1-abstract-full').style.display = 'inline'; document.getElementById('2401.08887v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08887v1-abstract-full" style="display: none;"> We introduce the first Natural Office Talkers in Settings of Far-field Audio Recordings (``NOTSOFAR-1'') Challenge alongside datasets and baseline system. The challenge focuses on distant speaker diarization and automatic speech recognition (DASR) in far-field meeting scenarios, with single-channel and known-geometry multi-channel tracks, and serves as a launch platform for two new datasets: First, a benchmarking dataset of 315 meetings, averaging 6 minutes each, capturing a broad spectrum of real-world acoustic conditions and conversational dynamics. It is recorded across 30 conference rooms, featuring 4-8 attendees and a total of 35 unique speakers. Second, a 1000-hour simulated training dataset, synthesized with enhanced authenticity for real-world generalization, incorporating 15,000 real acoustic transfer functions. The tasks focus on single-device DASR, where multi-channel devices always share the same known geometry. This is aligned with common setups in actual conference rooms, and avoids technical complexities associated with multi-device tasks. It also allows for the development of geometry-specific solutions. The NOTSOFAR-1 Challenge aims to advance research in the field of distant conversational speech recognition, providing key resources to unlock the potential of data-driven methods, which we believe are currently constrained by the absence of comprehensive high-quality training and benchmarking datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08887v1-abstract-full').style.display = 'none'; document.getElementById('2401.08887v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.04389">arXiv:2401.04389</a> <span> [<a href="https://arxiv.org/pdf/2401.04389">pdf</a>, <a href="https://arxiv.org/format/2401.04389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> RaD-Net: A Repairing and Denoising Network for Speech Signal Improvement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+M">Mingshuai Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhuangqi Chen</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+X">Xiaopeng Yan</a>, <a href="/search/eess?searchtype=author&query=Lv%2C+Y">Yuanjun Lv</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuanzeng Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.04389v1-abstract-short" style="display: inline;"> This paper introduces our repairing and denoising network (RaD-Net) for the ICASSP 2024 Speech Signal Improvement (SSI) Challenge. We extend our previous framework based on a two-stage network and propose an upgraded model. Specifically, we replace the repairing network with COM-Net from TEA-PSE. In addition, multi-resolution discriminators and multi-band discriminators are adopted in the training… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04389v1-abstract-full').style.display = 'inline'; document.getElementById('2401.04389v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.04389v1-abstract-full" style="display: none;"> This paper introduces our repairing and denoising network (RaD-Net) for the ICASSP 2024 Speech Signal Improvement (SSI) Challenge. We extend our previous framework based on a two-stage network and propose an upgraded model. Specifically, we replace the repairing network with COM-Net from TEA-PSE. In addition, multi-resolution discriminators and multi-band discriminators are adopted in the training stage. Finally, we use a three-step training strategy to optimize our model. We submit two models with different sets of parameters to meet the RTF requirement of the two tracks. According to the official results, the proposed systems rank 2nd in track 1 and 3rd in track 2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04389v1-abstract-full').style.display = 'none'; document.getElementById('2401.04389v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.03687">arXiv:2401.03687</a> <span> [<a href="https://arxiv.org/pdf/2401.03687">pdf</a>, <a href="https://arxiv.org/format/2401.03687">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> BS-PLCNet: Band-split Packet Loss Concealment Network with Multi-task Learning Framework and Multi-discriminators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zihan Zhang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jiayao Sun</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuanzeng Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.03687v1-abstract-short" style="display: inline;"> Packet loss is a common and unavoidable problem in voice over internet phone (VoIP) systems. To deal with the problem, we propose a band-split packet loss concealment network (BS-PLCNet). Specifically, we split the full-band signal into wide-band (0-8kHz) and high-band (8-24kHz). The wide-band signals are processed by a gated convolutional recurrent network (GCRN), while the high-band counterpart… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03687v1-abstract-full').style.display = 'inline'; document.getElementById('2401.03687v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.03687v1-abstract-full" style="display: none;"> Packet loss is a common and unavoidable problem in voice over internet phone (VoIP) systems. To deal with the problem, we propose a band-split packet loss concealment network (BS-PLCNet). Specifically, we split the full-band signal into wide-band (0-8kHz) and high-band (8-24kHz). The wide-band signals are processed by a gated convolutional recurrent network (GCRN), while the high-band counterpart is processed by a simple GRU network. To ensure high speech quality and automatic speech recognition (ASR) compatibility, multi-task learning (MTL) framework including fundamental frequency (f0) prediction, linguistic awareness, and multi-discriminators are used. The proposed approach tied for 1st place in the ICASSP 2024 PLC Challenge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03687v1-abstract-full').style.display = 'none'; document.getElementById('2401.03687v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.00413">arXiv:2401.00413</a> <span> [<a href="https://arxiv.org/pdf/2401.00413">pdf</a>, <a href="https://arxiv.org/format/2401.00413">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Real-Time FJ/MAC PDE Solvers via Tensorized, Back-Propagation-Free Optical PINN Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yequan Zhao</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xian Xiao</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+X">Xinling Yu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Ziyue Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhixiong Chen</a>, <a href="/search/eess?searchtype=author&query=Kurczveil%2C+G">Geza Kurczveil</a>, <a href="/search/eess?searchtype=author&query=Beausoleil%2C+R+G">Raymond G. Beausoleil</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zheng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.00413v2-abstract-short" style="display: inline;"> Solving partial differential equations (PDEs) numerically often requires huge computing time, energy cost, and hardware resources in practical applications. This has limited their applications in many scenarios (e.g., autonomous systems, supersonic flows) that have a limited energy budget and require near real-time response. Leveraging optical computing, this paper develops an on-chip training fra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00413v2-abstract-full').style.display = 'inline'; document.getElementById('2401.00413v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.00413v2-abstract-full" style="display: none;"> Solving partial differential equations (PDEs) numerically often requires huge computing time, energy cost, and hardware resources in practical applications. This has limited their applications in many scenarios (e.g., autonomous systems, supersonic flows) that have a limited energy budget and require near real-time response. Leveraging optical computing, this paper develops an on-chip training framework for physics-informed neural networks (PINNs), aiming to solve high-dimensional PDEs with fJ/MAC photonic power consumption and ultra-low latency. Despite the ultra-high speed of optical neural networks, training a PINN on an optical chip is hard due to (1) the large size of photonic devices, and (2) the lack of scalable optical memory devices to store the intermediate results of back-propagation (BP). To enable realistic optical PINN training, this paper presents a scalable method to avoid the BP process. We also employ a tensor-compressed approach to improve the convergence and scalability of our optical PINN training. This training framework is designed with tensorized optical neural networks (TONN) for scalable inference acceleration and MZI phase-domain tuning for \textit{in-situ} optimization. Our simulation results of a 20-dim HJB PDE show that our photonic accelerator can reduce the number of MZIs by a factor of $1.17\times 10^3$, with only $1.36$ J and $1.15$ s to solve this equation. This is the first real-size optical PINN training framework that can be applied to solve high-dimensional PDEs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00413v2-abstract-full').style.display = 'none'; document.getElementById('2401.00413v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ML with New Compute Paradigms (MLNCP) at NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.13311">arXiv:2312.13311</a> <span> [<a href="https://arxiv.org/pdf/2312.13311">pdf</a>, <a href="https://arxiv.org/format/2312.13311">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Unlocking Deep Learning: A BP-Free Approach for Parallel Block-Wise Training of Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cheng%2C+A">Anzhe Cheng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhenkun Wang</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+C">Chenzhong Yin</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+M">Mingxi Cheng</a>, <a href="/search/eess?searchtype=author&query=Ping%2C+H">Heng Ping</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiongye Xiao</a>, <a href="/search/eess?searchtype=author&query=Nazarian%2C+S">Shahin Nazarian</a>, <a href="/search/eess?searchtype=author&query=Bogdan%2C+P">Paul Bogdan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.13311v1-abstract-short" style="display: inline;"> Backpropagation (BP) has been a successful optimization technique for deep learning models. However, its limitations, such as backward- and update-locking, and its biological implausibility, hinder the concurrent updating of layers and do not mimic the local learning processes observed in the human brain. To address these issues, recent research has suggested using local error signals to asynchron… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13311v1-abstract-full').style.display = 'inline'; document.getElementById('2312.13311v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.13311v1-abstract-full" style="display: none;"> Backpropagation (BP) has been a successful optimization technique for deep learning models. However, its limitations, such as backward- and update-locking, and its biological implausibility, hinder the concurrent updating of layers and do not mimic the local learning processes observed in the human brain. To address these issues, recent research has suggested using local error signals to asynchronously train network blocks. However, this approach often involves extensive trial-and-error iterations to determine the best configuration for local training. This includes decisions on how to decouple network blocks and which auxiliary networks to use for each block. In our work, we introduce a novel BP-free approach: a block-wise BP-free (BWBPF) neural network that leverages local error signals to optimize distinct sub-neural networks separately, where the global loss is only responsible for updating the output layer. The local error signals used in the BP-free model can be computed in parallel, enabling a potential speed-up in the weight update process through parallel implementation. Our experimental results consistently show that this approach can identify transferable decoupled architectures for VGG and ResNet variations, outperforming models trained with end-to-end backpropagation and other state-of-the-art block-wise learning techniques on datasets such as CIFAR-10 and Tiny-ImageNet. The code is released at https://github.com/Belis0811/BWBPF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13311v1-abstract-full').style.display = 'none'; document.getElementById('2312.13311v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper has been accepted by ICASSP2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.06969">arXiv:2312.06969</a> <span> [<a href="https://arxiv.org/pdf/2312.06969">pdf</a>, <a href="https://arxiv.org/ps/2312.06969">ps</a>, <a href="https://arxiv.org/format/2312.06969">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Channel Estimation for Movable Antenna Communication Systems: A Framework Based on Compressed Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zhenyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+S">Songqi Cao</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yanming Liu</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.06969v1-abstract-short" style="display: inline;"> Movable antenna (MA) is a new technology with great potential to improve communication performance by enabling local movement of antennas for pursuing better channel conditions. In particular, the acquisition of complete channel state information (CSI) between the transmitter (Tx) and receiver (Rx) regions is an essential problem for MA systems to reap performance gains. In this paper, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06969v1-abstract-full').style.display = 'inline'; document.getElementById('2312.06969v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.06969v1-abstract-full" style="display: none;"> Movable antenna (MA) is a new technology with great potential to improve communication performance by enabling local movement of antennas for pursuing better channel conditions. In particular, the acquisition of complete channel state information (CSI) between the transmitter (Tx) and receiver (Rx) regions is an essential problem for MA systems to reap performance gains. In this paper, we propose a general channel estimation framework for MA systems by exploiting the multi-path field response channel structure. Specifically, the angles of departure (AoDs), angles of arrival (AoAs), and complex coefficients of the multi-path components (MPCs) are jointly estimated by employing the compressed sensing method, based on multiple channel measurements at designated positions of the Tx-MA and Rx-MA. Under this framework, the Tx-MA and Rx-MA measurement positions fundamentally determine the measurement matrix for compressed sensing, of which the mutual coherence is analyzed from the perspective of Fourier transform. Moreover, two criteria for MA measurement positions are provided to guarantee the successful recovery of MPCs. Then, we propose several MA measurement position setups and compare their performance. Finally, comprehensive simulation results show that the proposed framework is able to estimate the complete CSI between the Tx and Rx regions with a high accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06969v1-abstract-full').style.display = 'none'; document.getElementById('2312.06969v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.16565">arXiv:2311.16565</a> <span> [<a href="https://arxiv.org/pdf/2311.16565">pdf</a>, <a href="https://arxiv.org/format/2311.16565">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DiffusionTalker: Personalization and Acceleration for Speech-Driven 3D Face Diffuser </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+P">Peng Chen</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+X">Xiaobao Wei</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+M">Ming Lu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Y">Yitong Zhu</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+N">Naiming Yao</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xingyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+H">Hui Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.16565v2-abstract-short" style="display: inline;"> Speech-driven 3D facial animation has been an attractive task in both academia and industry. Traditional methods mostly focus on learning a deterministic mapping from speech to animation. Recent approaches start to consider the non-deterministic fact of speech-driven 3D face animation and employ the diffusion model for the task. However, personalizing facial animation and accelerating animation ge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16565v2-abstract-full').style.display = 'inline'; document.getElementById('2311.16565v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.16565v2-abstract-full" style="display: none;"> Speech-driven 3D facial animation has been an attractive task in both academia and industry. Traditional methods mostly focus on learning a deterministic mapping from speech to animation. Recent approaches start to consider the non-deterministic fact of speech-driven 3D face animation and employ the diffusion model for the task. However, personalizing facial animation and accelerating animation generation are still two major limitations of existing diffusion-based methods. To address the above limitations, we propose DiffusionTalker, a diffusion-based method that utilizes contrastive learning to personalize 3D facial animation and knowledge distillation to accelerate 3D animation generation. Specifically, to enable personalization, we introduce a learnable talking identity to aggregate knowledge in audio sequences. The proposed identity embeddings extract customized facial cues across different people in a contrastive learning manner. During inference, users can obtain personalized facial animation based on input audio, reflecting a specific talking style. With a trained diffusion model with hundreds of steps, we distill it into a lightweight model with 8 steps for acceleration. Extensive experiments are conducted to demonstrate that our method outperforms state-of-the-art methods. The code will be released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16565v2-abstract-full').style.display = 'none'; document.getElementById('2311.16565v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.11804">arXiv:2311.11804</a> <span> [<a href="https://arxiv.org/pdf/2311.11804">pdf</a>, <a href="https://arxiv.org/ps/2311.11804">ps</a>, <a href="https://arxiv.org/format/2311.11804">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Robust Multidimentional Chinese Remainder Theorem for Integer Vector Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+L">Li Xiao</a>, <a href="/search/eess?searchtype=author&query=Huo%2C+H">Haiye Huo</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.11804v1-abstract-short" style="display: inline;"> The problem of robustly reconstructing an integer vector from its erroneous remainders appears in many applications in the field of multidimensional (MD) signal processing. To address this problem, a robust MD Chinese remainder theorem (CRT) was recently proposed for a special class of moduli, where the remaining integer matrices left-divided by a greatest common left divisor (gcld) of all the mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11804v1-abstract-full').style.display = 'inline'; document.getElementById('2311.11804v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.11804v1-abstract-full" style="display: none;"> The problem of robustly reconstructing an integer vector from its erroneous remainders appears in many applications in the field of multidimensional (MD) signal processing. To address this problem, a robust MD Chinese remainder theorem (CRT) was recently proposed for a special class of moduli, where the remaining integer matrices left-divided by a greatest common left divisor (gcld) of all the moduli are pairwise commutative and coprime. The strict constraint on the moduli limits the usefulness of the robust MD-CRT in practice. In this paper, we investigate the robust MD-CRT for a general set of moduli. We first introduce a necessary and sufficient condition on the difference between paired remainder errors, followed by a simple sufficient condition on the remainder error bound, for the robust MD-CRT for general moduli, where the conditions are associated with (the minimum distances of) these lattices generated by gcld's of paired moduli, and a closed-form reconstruction algorithm is presented. We then generalize the above results of the robust MD-CRT from integer vectors/matrices to real ones. Finally, we validate the robust MD-CRT for general moduli by employing numerical simulations, and apply it to MD sinusoidal frequency estimation based on multiple sub-Nyquist samplers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11804v1-abstract-full').style.display = 'none'; document.getElementById('2311.11804v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 5 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.10416">arXiv:2311.10416</a> <span> [<a href="https://arxiv.org/pdf/2311.10416">pdf</a>, <a href="https://arxiv.org/format/2311.10416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Meta-DSP: A Meta-Learning Approach for Data-Driven Nonlinear Compensation in High-Speed Optical Fiber Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xinyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Z">Zhennan Zhou</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+B">Bin Dong</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+D">Dingjiong Ma</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+L">Li Zhou</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jie Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.10416v1-abstract-short" style="display: inline;"> Non-linear effects in long-haul, high-speed optical fiber systems significantly hinder channel capacity. While the Digital Backward Propagation algorithm (DBP) with adaptive filter (ADF) can mitigate these effects, it suffers from an overwhelming computational complexity. Recent solutions have incorporated deep neural networks in a data-driven strategy to alleviate this complexity in the DBP model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10416v1-abstract-full').style.display = 'inline'; document.getElementById('2311.10416v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.10416v1-abstract-full" style="display: none;"> Non-linear effects in long-haul, high-speed optical fiber systems significantly hinder channel capacity. While the Digital Backward Propagation algorithm (DBP) with adaptive filter (ADF) can mitigate these effects, it suffers from an overwhelming computational complexity. Recent solutions have incorporated deep neural networks in a data-driven strategy to alleviate this complexity in the DBP model. However, these models are often limited to a specific symbol rate and channel number, necessitating retraining for different settings, their performance declines significantly under high-speed and high-power conditions. We introduce Meta-DSP, a novel data-driven nonlinear compensation model based on meta-learning that processes multi-modal data across diverse transmission rates, power levels, and channel numbers. This not only enhances signal quality but also substantially reduces the complexity of the nonlinear processing algorithm. Our model delivers a 0.7 dB increase in the Q-factor over Electronic Dispersion Compensation (EDC), and compared to DBP, it curtails computational complexity by a factor of ten while retaining comparable performance. From the perspective of the entire signal processing system, the core idea of Meta-DSP can be employed in any segment of the overall communication system to enhance the model's scalability and generalization performance. Our research substantiates Meta-DSP's proficiency in addressing the critical parameters defining optical communication networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10416v1-abstract-full').style.display = 'none'; document.getElementById('2311.10416v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.05236">arXiv:2311.05236</a> <span> [<a href="https://arxiv.org/pdf/2311.05236">pdf</a>, <a href="https://arxiv.org/ps/2311.05236">ps</a>, <a href="https://arxiv.org/format/2311.05236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Delay Doppler Transform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.05236v2-abstract-short" style="display: inline;"> This letter is to introduce delay Doppler transform (DDT) for a time domain signal. It is motivated by the recent studies in wireless communications over delay Doppler channels that have both time and Doppler spreads, such as, satellite communication channels. We present some simple properties of DDT as well. The DDT study may provide insights of delay Doppler channels. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.05236v2-abstract-full" style="display: none;"> This letter is to introduce delay Doppler transform (DDT) for a time domain signal. It is motivated by the recent studies in wireless communications over delay Doppler channels that have both time and Doppler spreads, such as, satellite communication channels. We present some simple properties of DDT as well. The DDT study may provide insights of delay Doppler channels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05236v2-abstract-full').style.display = 'none'; document.getElementById('2311.05236v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04992">arXiv:2310.04992</a> <span> [<a href="https://arxiv.org/pdf/2310.04992">pdf</a>, <a href="https://arxiv.org/format/2310.04992">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VisionFM: a Multi-Modal Multi-Task Vision Foundation Model for Generalist Ophthalmic Artificial Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qiu%2C+J">Jianing Qiu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+J">Jian Wu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+H">Hao Wei</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+P">Peilun Shi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+M">Minqing Zhang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Yunyun Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+L">Lin Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hanruo Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hongyi Liu</a>, <a href="/search/eess?searchtype=author&query=Hou%2C+S">Simeng Hou</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yuyang Zhao</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+X">Xuehui Shi</a>, <a href="/search/eess?searchtype=author&query=Xian%2C+J">Junfang Xian</a>, <a href="/search/eess?searchtype=author&query=Qu%2C+X">Xiaoxia Qu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+S">Sirui Zhu</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+L">Lijie Pan</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xiaoniao Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaojia Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+S">Shuai Jiang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kebing Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chenlong Yang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+M">Mingqiang Chen</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+S">Sujie Fan</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+J">Jianhua Hu</a>, <a href="/search/eess?searchtype=author&query=Lv%2C+A">Aiguo Lv</a> , et al. (17 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04992v1-abstract-short" style="display: inline;"> We present VisionFM, a foundation model pre-trained with 3.4 million ophthalmic images from 560,457 individuals, covering a broad range of ophthalmic diseases, modalities, imaging devices, and demography. After pre-training, VisionFM provides a foundation to foster multiple ophthalmic artificial intelligence (AI) applications, such as disease screening and diagnosis, disease prognosis, subclassifi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04992v1-abstract-full').style.display = 'inline'; document.getElementById('2310.04992v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04992v1-abstract-full" style="display: none;"> We present VisionFM, a foundation model pre-trained with 3.4 million ophthalmic images from 560,457 individuals, covering a broad range of ophthalmic diseases, modalities, imaging devices, and demography. After pre-training, VisionFM provides a foundation to foster multiple ophthalmic artificial intelligence (AI) applications, such as disease screening and diagnosis, disease prognosis, subclassification of disease phenotype, and systemic biomarker and disease prediction, with each application enhanced with expert-level intelligence and accuracy. The generalist intelligence of VisionFM outperformed ophthalmologists with basic and intermediate levels in jointly diagnosing 12 common ophthalmic diseases. Evaluated on a new large-scale ophthalmic disease diagnosis benchmark database, as well as a new large-scale segmentation and detection benchmark database, VisionFM outperformed strong baseline deep neural networks. The ophthalmic image representations learned by VisionFM exhibited noteworthy explainability, and demonstrated strong generalizability to new ophthalmic modalities, disease spectrum, and imaging devices. As a foundation model, VisionFM has a large capacity to learn from diverse ophthalmic imaging data and disparate datasets. To be commensurate with this capacity, in addition to the real data used for pre-training, we also generated and leveraged synthetic ophthalmic imaging data. Experimental results revealed that synthetic data that passed visual Turing tests, can also enhance the representation learning capability of VisionFM, leading to substantial performance gains on downstream ophthalmic AI tasks. Beyond the ophthalmic AI applications developed, validated, and demonstrated in this work, substantial further applications can be achieved in an efficient and cost-effective manner using VisionFM as the foundation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04992v1-abstract-full').style.display = 'none'; document.getElementById('2310.04992v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04715">arXiv:2310.04715</a> <span> [<a href="https://arxiv.org/pdf/2310.04715">pdf</a>, <a href="https://arxiv.org/format/2310.04715">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> An Exploration of Task-decoupling on Two-stage Neural Post Filter for Real-time Personalized Acoustic Echo Cancellation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zihan Zhang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jiayao Sun</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Ziqian Wang</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+X">Xiaopeng Yan</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04715v1-abstract-short" style="display: inline;"> Deep learning based techniques have been popularly adopted in acoustic echo cancellation (AEC). Utilization of speaker representation has extended the frontier of AEC, thus attracting many researchers' interest in personalized acoustic echo cancellation (PAEC). Meanwhile, task-decoupling strategies are widely adopted in speech enhancement. To further explore the task-decoupling approach, we propos… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04715v1-abstract-full').style.display = 'inline'; document.getElementById('2310.04715v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04715v1-abstract-full" style="display: none;"> Deep learning based techniques have been popularly adopted in acoustic echo cancellation (AEC). Utilization of speaker representation has extended the frontier of AEC, thus attracting many researchers' interest in personalized acoustic echo cancellation (PAEC). Meanwhile, task-decoupling strategies are widely adopted in speech enhancement. To further explore the task-decoupling approach, we propose to use a two-stage task-decoupling post-filter (TDPF) in PAEC. Furthermore, a multi-scale local-global speaker representation is applied to improve speaker extraction in PAEC. Experimental results indicate that the task-decoupling model can yield better performance than a single joint network. The optimal approach is to decouple the echo cancellation from noise and interference speech suppression. Based on the task-decoupling sequence, optimal training strategies for the two-stage model are explored afterwards. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04715v1-abstract-full').style.display = 'none'; document.getElementById('2310.04715v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted to ASRU 2023</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xiao%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xiao%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xiao%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Xiao%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository