Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 133 results for author: <span class="mathjax">Xia, X</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Xia%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Xia, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Xia%2C+X&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Xia, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xia%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xia%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09436">arXiv:2410.09436</a> <span> [<a href="https://arxiv.org/pdf/2410.09436">pdf</a>, <a href="https://arxiv.org/ps/2410.09436">ps</a>, <a href="https://arxiv.org/format/2410.09436">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Sum Rate Maximization for Movable Antenna Enhanced Multiuser Covert Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Mao%2C+H">Haobin Mao</a>, <a href="/search/eess?searchtype=author&query=Pi%2C+X">Xiangyu Pi</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zhenyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09436v2-abstract-short" style="display: inline;"> In this letter, we propose to employ movable antenna (MA) to enhance covert communications with noise uncertainty, where the confidential data is transmitted from an MA-aided access point (AP) to multiple users with a warden attempting to detect the existence of the legal transmission. To maximize the sum rate of users under covertness constraint, we formulate an optimization problem to jointly de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09436v2-abstract-full').style.display = 'inline'; document.getElementById('2410.09436v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09436v2-abstract-full" style="display: none;"> In this letter, we propose to employ movable antenna (MA) to enhance covert communications with noise uncertainty, where the confidential data is transmitted from an MA-aided access point (AP) to multiple users with a warden attempting to detect the existence of the legal transmission. To maximize the sum rate of users under covertness constraint, we formulate an optimization problem to jointly design the transmit beamforming and the positions of MAs at the AP. To solve the formulated non-convex optimization problem, we develop a block successive upper-bound minimization (BSUM) based algorithm, where the proximal distance algorithm (PDA) and the successive convex approximation (SCA) are employed to optimize the transmit beamforming and the MAs' positions, respectively. Simulation results show that the proposed MAs-aided system can significantly increase the covert sum rate via antenna position optimization as compared to conventional systems with fixed-position antennas (FPAs). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09436v2-abstract-full').style.display = 'none'; document.getElementById('2410.09436v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 5 figures (subfigures included), submitted to an IEEE journal for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03559">arXiv:2410.03559</a> <span> [<a href="https://arxiv.org/pdf/2410.03559">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Optimizing food taste sensory evaluation through neural network-based taste electroencephalogram channel selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiuxin Xia</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qun Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">He Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Chenrui Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+P">Pengwei Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yan Shi</a>, <a href="/search/eess?searchtype=author&query=Men%2C+H">Hong Men</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03559v1-abstract-short" style="display: inline;"> The taste electroencephalogram (EEG) evoked by the taste stimulation can reflect different brain patterns and be used in applications such as sensory evaluation of food. However, considering the computational cost and efficiency, EEG data with many channels has to face the critical issue of channel selection. This paper proposed a channel selection method called class activation mapping with atten… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03559v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03559v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03559v1-abstract-full" style="display: none;"> The taste electroencephalogram (EEG) evoked by the taste stimulation can reflect different brain patterns and be used in applications such as sensory evaluation of food. However, considering the computational cost and efficiency, EEG data with many channels has to face the critical issue of channel selection. This paper proposed a channel selection method called class activation mapping with attention (CAM-Attention). The CAM-Attention method combined a convolutional neural network with channel and spatial attention (CNN-CSA) model with a gradient-weighted class activation mapping (Grad-CAM) model. The CNN-CSA model exploited key features in EEG data by attention mechanism, and the Grad-CAM model effectively realized the visualization of feature regions. Then, channel selection was effectively implemented based on feature regions. Finally, the CAM-Attention method reduced the computational burden of taste EEG recognition and effectively distinguished the four tastes. In short, it has excellent recognition performance and provides effective technical support for taste sensory evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03559v1-abstract-full').style.display = 'none'; document.getElementById('2410.03559v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19346">arXiv:2409.19346</a> <span> [<a href="https://arxiv.org/pdf/2409.19346">pdf</a>, <a href="https://arxiv.org/ps/2409.19346">ps</a>, <a href="https://arxiv.org/format/2409.19346">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Channel Estimation for Movable Antenna Aided Wideband Communication Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zhenyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+S">Songqi Cao</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/eess?searchtype=author&query=Ning%2C+B">Boyu Ning</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19346v1-abstract-short" style="display: inline;"> Movable antenna (MA) is an emerging technology that can significantly improve communication performance via the continuous adjustment of the antenna positions. To unleash the potential of MAs in wideband communication systems, acquiring accurate channel state information (CSI), i.e., the channel frequency responses (CFRs) between any position pair within the transmit (Tx) region and the receive (R… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19346v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19346v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19346v1-abstract-full" style="display: none;"> Movable antenna (MA) is an emerging technology that can significantly improve communication performance via the continuous adjustment of the antenna positions. To unleash the potential of MAs in wideband communication systems, acquiring accurate channel state information (CSI), i.e., the channel frequency responses (CFRs) between any position pair within the transmit (Tx) region and the receive (Rx) region across all subcarriers, is a crucial issue. In this paper, we study the channel estimation problem for wideband MA systems. To start with, we express the CFRs as a combination of the field-response vectors (FRVs), delay-response vector (DRV), and path-response tensor (PRT), which exhibit sparse characteristics and can be recovered by using a limited number of channel measurements at selected position pairs of Tx and Rx MAs over a few subcarriers. Specifically, we first formulate the recovery of the FRVs and DRV as a problem with multiple measurement vectors in compressed sensing (MMV-CS), which can be solved via a simultaneous orthogonal matching pursuit (SOMP) algorithm. Next, we estimate the PRT using the least-square (LS) method. Moreover, we also devise an alternating refinement approach to further improve the accuracy of the estimated FRVs, DRV, and PRT. This is achieved by minimizing the discrepancy between the received pilots and those constructed by the estimated CSI, which can be efficiently carried out by using the gradient descent algorithm. Finally, simulation results demonstrate that both the SOMP-based channel estimation method and alternating refinement method can reconstruct the complete wideband CSI with high accuracy, where the alternating refinement method performs better despite a higher complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19346v1-abstract-full').style.display = 'none'; document.getElementById('2409.19346v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16301">arXiv:2409.16301</a> <span> [<a href="https://arxiv.org/pdf/2409.16301">pdf</a>, <a href="https://arxiv.org/format/2409.16301">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Gait Switching and Enhanced Stabilization of Walking Robots with Deep Learning-based Reachability: A Case Study on Two-link Walker </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xingpeng Xia</a>, <a href="/search/eess?searchtype=author&query=Choi%2C+J+J">Jason J. Choi</a>, <a href="/search/eess?searchtype=author&query=Agrawal%2C+A">Ayush Agrawal</a>, <a href="/search/eess?searchtype=author&query=Sreenath%2C+K">Koushil Sreenath</a>, <a href="/search/eess?searchtype=author&query=Tomlin%2C+C+J">Claire J. Tomlin</a>, <a href="/search/eess?searchtype=author&query=Bansal%2C+S">Somil Bansal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16301v1-abstract-short" style="display: inline;"> Learning-based approaches have recently shown notable success in legged locomotion. However, these approaches often lack accountability, necessitating empirical tests to determine their effectiveness. In this work, we are interested in designing a learning-based locomotion controller whose stability can be examined and guaranteed. This can be achieved by verifying regions of attraction (RoAs) of l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16301v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16301v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16301v1-abstract-full" style="display: none;"> Learning-based approaches have recently shown notable success in legged locomotion. However, these approaches often lack accountability, necessitating empirical tests to determine their effectiveness. In this work, we are interested in designing a learning-based locomotion controller whose stability can be examined and guaranteed. This can be achieved by verifying regions of attraction (RoAs) of legged robots to their stable walking gaits. This is a non-trivial problem for legged robots due to their hybrid dynamics. Although previous work has shown the utility of Hamilton-Jacobi (HJ) reachability to solve this problem, its practicality was limited by its poor scalability. The core contribution of our work is the employment of a deep learning-based HJ reachability solution to the hybrid legged robot dynamics, which overcomes the previous work's limitation. With the learned reachability solution, first, we can estimate a library of RoAs for various gaits. Second, we can design a one-step predictive controller that effectively stabilizes to an individual gait within the verified RoA. Finally, we can devise a strategy that switches gaits, in response to external perturbations, whose feasibility is guided by the RoA analysis. We demonstrate our method in a two-link walker simulation, whose mathematical model is well established. Our method achieves improved stability than previous model-based methods, while ensuring transparency that was not present in the existing learning-based approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16301v1-abstract-full').style.display = 'none'; document.getElementById('2409.16301v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The first two authors contributed equally. This work is supported in part by the NSF Grant CMMI-1944722, the NSF CAREER Program under award 2240163, the NASA ULI on Safe Aviation Autonomy, and the DARPA Assured Autonomy and Assured Neuro Symbolic Learning and Reasoning (ANSR) programs. The work of Jason J. Choi received the support of a fellowship from Kwanjeong Educational Foundation, Korea</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03005">arXiv:2409.03005</a> <span> [<a href="https://arxiv.org/pdf/2409.03005">pdf</a>, <a href="https://arxiv.org/format/2409.03005">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> PIETRA: Physics-Informed Evidential Learning for Traversing Out-of-Distribution Terrain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cai%2C+X">Xiaoyi Cai</a>, <a href="/search/eess?searchtype=author&query=Queeney%2C+J">James Queeney</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+T">Tong Xu</a>, <a href="/search/eess?searchtype=author&query=Datar%2C+A">Aniket Datar</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+C">Chenhui Pan</a>, <a href="/search/eess?searchtype=author&query=Miller%2C+M">Max Miller</a>, <a href="/search/eess?searchtype=author&query=Flather%2C+A">Ashton Flather</a>, <a href="/search/eess?searchtype=author&query=Osteen%2C+P+R">Philip R. Osteen</a>, <a href="/search/eess?searchtype=author&query=Roy%2C+N">Nicholas Roy</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xuesu Xiao</a>, <a href="/search/eess?searchtype=author&query=How%2C+J+P">Jonathan P. How</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03005v1-abstract-short" style="display: inline;"> Self-supervised learning is a powerful approach for developing traversability models for off-road navigation, but these models often struggle with inputs unseen during training. Existing methods utilize techniques like evidential deep learning to quantify model uncertainty, helping to identify and avoid out-of-distribution terrain. However, always avoiding out-of-distribution terrain can be overly… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03005v1-abstract-full').style.display = 'inline'; document.getElementById('2409.03005v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03005v1-abstract-full" style="display: none;"> Self-supervised learning is a powerful approach for developing traversability models for off-road navigation, but these models often struggle with inputs unseen during training. Existing methods utilize techniques like evidential deep learning to quantify model uncertainty, helping to identify and avoid out-of-distribution terrain. However, always avoiding out-of-distribution terrain can be overly conservative, e.g., when novel terrain can be effectively analyzed using a physics-based model. To overcome this challenge, we introduce Physics-Informed Evidential Traversability (PIETRA), a self-supervised learning framework that integrates physics priors directly into the mathematical formulation of evidential neural networks and introduces physics knowledge implicitly through an uncertainty-aware, physics-informed training loss. Our evidential network seamlessly transitions between learned and physics-based predictions for out-of-distribution inputs. Additionally, the physics-informed loss regularizes the learned model, ensuring better alignment with the physics model. Extensive simulations and hardware experiments demonstrate that PIETRA improves both learning accuracy and navigation performance in environments with significant distribution shifts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03005v1-abstract-full').style.display = 'none'; document.getElementById('2409.03005v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to RA-L. Video: https://youtu.be/OTnNZ96oJRk</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01695">arXiv:2409.01695</a> <span> [<a href="https://arxiv.org/pdf/2409.01695">pdf</a>, <a href="https://arxiv.org/format/2409.01695">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> USTC-KXDIGIT System Description for ASVspoof5 Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yihao Chen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+H">Haochen Wu</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+N">Nan Jiang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang Xia</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+Q">Qing Gu</a>, <a href="/search/eess?searchtype=author&query=Hao%2C+Y">Yunqi Hao</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+P">Pengfei Cai</a>, <a href="/search/eess?searchtype=author&query=Guan%2C+Y">Yu Guan</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jialong Wang</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+W">Weilin Xie</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+L">Lei Fang</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+S">Sian Fang</a>, <a href="/search/eess?searchtype=author&query=Song%2C+Y">Yan Song</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+L">Lin Liu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+M">Minqiang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01695v1-abstract-short" style="display: inline;"> This paper describes the USTC-KXDIGIT system submitted to the ASVspoof5 Challenge for Track 1 (speech deepfake detection) and Track 2 (spoofing-robust automatic speaker verification, SASV). Track 1 showcases a diverse range of technical qualities from potential processing algorithms and includes both open and closed conditions. For these conditions, our system consists of a cascade of a frontend f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01695v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01695v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01695v1-abstract-full" style="display: none;"> This paper describes the USTC-KXDIGIT system submitted to the ASVspoof5 Challenge for Track 1 (speech deepfake detection) and Track 2 (spoofing-robust automatic speaker verification, SASV). Track 1 showcases a diverse range of technical qualities from potential processing algorithms and includes both open and closed conditions. For these conditions, our system consists of a cascade of a frontend feature extractor and a back-end classifier. We focus on extensive embedding engineering and enhancing the generalization of the back-end classifier model. Specifically, the embedding engineering is based on hand-crafted features and speech representations from a self-supervised model, used for closed and open conditions, respectively. To detect spoof attacks under various adversarial conditions, we trained multiple systems on an augmented training set. Additionally, we used voice conversion technology to synthesize fake audio from genuine audio in the training set to enrich the synthesis algorithms. To leverage the complementary information learned by different model architectures, we employed activation ensemble and fused scores from different systems to obtain the final decision score for spoof detection. During the evaluation phase, the proposed methods achieved 0.3948 minDCF and 14.33% EER in the close condition, and 0.0750 minDCF and 2.59% EER in the open condition, demonstrating the robustness of our submitted systems under adversarial conditions. In Track 2, we continued using the CM system from Track 1 and fused it with a CNN-based ASV system. This approach achieved 0.2814 min-aDCF in the closed condition and 0.0756 min-aDCF in the open condition, showcasing superior performance in the SASV system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01695v1-abstract-full').style.display = 'none'; document.getElementById('2409.01695v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ASVspoof5 workshop paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07264">arXiv:2408.07264</a> <span> [<a href="https://arxiv.org/pdf/2408.07264">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1002/ima.22933">10.1002/ima.22933 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Lesion-aware network for diabetic retinopathy diagnosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xue Xia</a>, <a href="/search/eess?searchtype=author&query=Zhan%2C+K">Kun Zhan</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yuming Fang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+W">Wenhui Jiang</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+F">Fei Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07264v1-abstract-short" style="display: inline;"> Deep learning brought boosts to auto diabetic retinopathy (DR) diagnosis, thus, greatly helping ophthalmologists for early disease detection, which contributes to preventing disease deterioration that may eventually lead to blindness. It has been proved that convolutional neural network (CNN)-aided lesion identifying or segmentation benefits auto DR screening. The key to fine-grained lesion tasks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07264v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07264v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07264v1-abstract-full" style="display: none;"> Deep learning brought boosts to auto diabetic retinopathy (DR) diagnosis, thus, greatly helping ophthalmologists for early disease detection, which contributes to preventing disease deterioration that may eventually lead to blindness. It has been proved that convolutional neural network (CNN)-aided lesion identifying or segmentation benefits auto DR screening. The key to fine-grained lesion tasks mainly lies in: (1) extracting features being both sensitive to tiny lesions and robust against DR-irrelevant interference, and (2) exploiting and re-using encoded information to restore lesion locations under extremely imbalanced data distribution. To this end, we propose a CNN-based DR diagnosis network with attention mechanism involved, termed lesion-aware network, to better capture lesion information from imbalanced data. Specifically, we design the lesion-aware module (LAM) to capture noise-like lesion areas across deeper layers, and the feature-preserve module (FPM) to assist shallow-to-deep feature fusion. Afterward, the proposed lesion-aware network (LANet) is constructed by embedding the LAM and FPM into the CNN decoders for DR-related information utilization. The proposed LANet is then further extended to a DR screening network by adding a classification layer. Through experiments on three public fundus datasets with pixel-level annotations, our method outperforms the mainstream methods with an area under curve of 0.967 in DR screening, and increases the overall average precision by 7.6%, 2.1%, and 1.2% in lesion segmentation on three datasets. Besides, the ablation study validates the effectiveness of the proposed sub-modules. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07264v1-abstract-full').style.display = 'none'; document.getElementById('2408.07264v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is submitted version wihout improvements by reviewers. The final version is published on International Journal of Imaging Systems and Techonology (https://onlinelibrary.wiley.com/doi/10.1002/ima.22933)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06588">arXiv:2408.06588</a> <span> [<a href="https://arxiv.org/pdf/2408.06588">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Orbital-Angular-Momentum Versus MIMO: Orthogonality, Degree of Freedom,and Capacity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jing%2C+H">Haiyue Jing</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+W">Wenchi Cheng</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Hailin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06588v1-abstract-short" style="display: inline;"> The plane wave based wireless communications have becoming more and more matured, along with the well utilization of the traditional resources such as time and frequency. To further increase the capacity for rapidly increasing capacity demand of wireless communications, it is potential to use the twist wave, which has the orbital angular momentum (OAM). In this paper, we discuss the OAM based wire… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06588v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06588v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06588v1-abstract-full" style="display: none;"> The plane wave based wireless communications have becoming more and more matured, along with the well utilization of the traditional resources such as time and frequency. To further increase the capacity for rapidly increasing capacity demand of wireless communications, it is potential to use the twist wave, which has the orbital angular momentum (OAM). In this paper, we discuss the OAM based wireless communications in the aspect of orthogonality, degree of freedom (DoF), and capacity, where both the transmitter and the receiver use uniform circular array (UCA) antennas. In particular, we compare OAM based wireless communications with multiple-input-multiple-output (MIMO) based wireless communications in terms of DoF and capacity. Numerical results are presented to validate and evaluate that the DoF of OAM based wireless communications is greater than or equal to that of correlated MIMO based wireless communications when the transmitter and the receiver antennas are aligned well. The OAM based wireless communications can achieve larger capacity than the correlated MIMO in high signal-to-noise ratio (SNR) region under line-of-sight scenario. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06588v1-abstract-full').style.display = 'none'; document.getElementById('2408.06588v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06584">arXiv:2408.06584</a> <span> [<a href="https://arxiv.org/pdf/2408.06584">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Fast Transceiver Design for RIS-Assisted MIMO mmWave Wireless Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jing%2C+H">Haiyue Jing</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+W">Wenchi Cheng</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06584v1-abstract-short" style="display: inline;"> Due to high bandwidth and small antenna size, millimeter-wave (mmWave) integrated line-of-sight (LOS) multiple-input-multiple-output (MIMO) systems have attracted much attention. Reconfigurable intelligent surfaces (RISs), which have the potential to change the characteristics of incident electromagnetic waves with low power cost, can improve the performance or the MIMO mmWave wireless communicati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06584v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06584v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06584v1-abstract-full" style="display: none;"> Due to high bandwidth and small antenna size, millimeter-wave (mmWave) integrated line-of-sight (LOS) multiple-input-multiple-output (MIMO) systems have attracted much attention. Reconfigurable intelligent surfaces (RISs), which have the potential to change the characteristics of incident electromagnetic waves with low power cost, can improve the performance or the MIMO mmWave wireless communications. Uniform circular array (UCA) is an effective antenna structure with low complexity transceiver. In this paper, UCA based RIS-assisted MIMO mmWave wireless communications with transmit UCA, the RIS UCAs, and receive UCA are investigated. Since the rotation angles between the transceiver make the channel matrix noncirculant, an algorithm is developed to derive the ranges of the rotation angles based on an acceptable error and reduce the impact of rotation angles on channel matrix. Then, we propose a low-complexity precoding scheme at the transmitter, phase designs at the RIS UCAs, and a phase compensation scheme at the receiver, which can convert the channel matrix into an equivalent circulant channel matrix with a small error. Then, a fast symbol-wise maximum likelihood (ML) detection scheme is proposed to recover the signals with low computational complexity. Simulation results are presented to illustrate the theory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06584v1-abstract-full').style.display = 'none'; document.getElementById('2408.06584v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19503">arXiv:2407.19503</a> <span> [<a href="https://arxiv.org/pdf/2407.19503">pdf</a>, <a href="https://arxiv.org/ps/2407.19503">ps</a>, <a href="https://arxiv.org/format/2407.19503">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Discrete Spectrum Analysis of Vector OFDM Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+W">Wei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19503v1-abstract-short" style="display: inline;"> Vector OFDM (VOFDM) is equivalent to OTFS and is good for time-varying channels. However, due to its vector form, its signal spectrum is not as clear as that of the conventional OFDM. In this paper, we study the discrete spectrum of discrete VOFDM signals. We obtain a linear relationship between a vector of information symbols and a vector of the same size of components evenly distributed in the d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19503v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19503v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19503v1-abstract-full" style="display: none;"> Vector OFDM (VOFDM) is equivalent to OTFS and is good for time-varying channels. However, due to its vector form, its signal spectrum is not as clear as that of the conventional OFDM. In this paper, we study the discrete spectrum of discrete VOFDM signals. We obtain a linear relationship between a vector of information symbols and a vector of the same size of components evenly distributed in the discrete VOFDM signal spectrum, and show that if a vector of information symbols is set to 0, then a corresponding vector of the same size of the discrete VOFDM signal spectrum is 0 as well, where the components of the 0 vector are not together but evenly distributed in the spectrum. With the linear relationship, the information symbol vectors can be locally precoded so that any of the discrete spectrum of VOFDM signals can be set to 0, similar to that of the conventional OFDM signals. These results are verified by simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19503v1-abstract-full').style.display = 'none'; document.getElementById('2407.19503v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17882">arXiv:2407.17882</a> <span> [<a href="https://arxiv.org/pdf/2407.17882">pdf</a>, <a href="https://arxiv.org/format/2407.17882">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Artificial Immunofluorescence in a Flash: Rapid Synthetic Imaging from Brightfield Through Residual Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+C">Chunling Tang</a>, <a href="/search/eess?searchtype=author&query=Murdoch%2C+S">Siofra Murdoch</a>, <a href="/search/eess?searchtype=author&query=Papanastasiou%2C+G">Giorgos Papanastasiou</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yunzhe Guo</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xianglu Xiao</a>, <a href="/search/eess?searchtype=author&query=Cross-Zamirski%2C+J">Jan Cross-Zamirski</a>, <a href="/search/eess?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+K+X">Kristina Xiao Liang</a>, <a href="/search/eess?searchtype=author&query=Niu%2C+Z">Zhangming Niu</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+E+F">Evandro Fei Fang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yinhai Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17882v1-abstract-short" style="display: inline;"> Immunofluorescent (IF) imaging is crucial for visualizing biomarker expressions, cell morphology and assessing the effects of drug treatments on sub-cellular components. IF imaging needs extra staining process and often requiring cell fixation, therefore it may also introduce artefects and alter endogenouous cell morphology. Some IF stains are expensive or not readily available hence hindering exp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17882v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17882v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17882v1-abstract-full" style="display: none;"> Immunofluorescent (IF) imaging is crucial for visualizing biomarker expressions, cell morphology and assessing the effects of drug treatments on sub-cellular components. IF imaging needs extra staining process and often requiring cell fixation, therefore it may also introduce artefects and alter endogenouous cell morphology. Some IF stains are expensive or not readily available hence hindering experiments. Recent diffusion models, which synthesise high-fidelity IF images from easy-to-acquire brightfield (BF) images, offer a promising solution but are hindered by training instability and slow inference times due to the noise diffusion process. This paper presents a novel method for the conditional synthesis of IF images directly from BF images along with cell segmentation masks. Our approach employs a Residual Diffusion process that enhances stability and significantly reduces inference time. We performed a critical evaluation against other image-to-image synthesis models, including UNets, GANs, and advanced diffusion models. Our model demonstrates significant improvements in image quality (p<0.05 in MSE, PSNR, and SSIM), inference speed (26 times faster than competing diffusion models), and accurate segmentation results for both nuclei and cell bodies (0.77 and 0.63 mean IOU for nuclei and cell true positives, respectively). This paper is a substantial advancement in the field, providing robust and efficient tools for cell image analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17882v1-abstract-full').style.display = 'none'; document.getElementById('2407.17882v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10848">arXiv:2407.10848</a> <span> [<a href="https://arxiv.org/pdf/2407.10848">pdf</a>, <a href="https://arxiv.org/format/2407.10848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TWC.2024.3429495">10.1109/TWC.2024.3429495 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> On the Spectral Efficiency of Multi-user Holographic MIMO Uplink Transmission </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qian%2C+M">Mengyu Qian</a>, <a href="/search/eess?searchtype=author&query=You%2C+L">Li You</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10848v1-abstract-short" style="display: inline;"> With antenna spacing much less than half a wavelength in confined space, holographic multiple-input multiple-output (HMIMO) technology presents a promising frontier in next-generation mobile communication. We delve into the research of the multi-user uplink transmission with both the base station and the users equipped with holographic planar arrays. To begin, we construct an HMIMO channel model u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10848v1-abstract-full').style.display = 'inline'; document.getElementById('2407.10848v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10848v1-abstract-full" style="display: none;"> With antenna spacing much less than half a wavelength in confined space, holographic multiple-input multiple-output (HMIMO) technology presents a promising frontier in next-generation mobile communication. We delve into the research of the multi-user uplink transmission with both the base station and the users equipped with holographic planar arrays. To begin, we construct an HMIMO channel model utilizing electromagnetic field equations, accompanied by a colored noise model that accounts for both electromagnetic interference and hardware noise. Since this model is continuous, we approximate it within a finite-dimensional space spanned by Fourier space series, which can be defined as the communication mode functions. We show that this channel model samples Green's function in the wavenumber domain in different communication modes. Subsequently, we tackle the challenging task of maximizing the spectral efficiency (SE) of the system, which involves optimizing the continuous current density function (CDF) for each user. Using the aforementioned approximation model, we transform the optimization variables into expansion coefficients of the CDFs on a finite-dimensional space, for which we propose an iterative water-filling algorithm. Simulation results illustrate the efficacy of the proposed algorithm in enhancing the system SE and show the influence of the colored noise and the system parameters on the SE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10848v1-abstract-full').style.display = 'none'; document.getElementById('2407.10848v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 7 figures, to appear in IEEE Transactions on Wireless Communications</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Wireless Communications, vol. 23, no. 10, pp. 15421-15434, Oct. 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09507">arXiv:2407.09507</a> <span> [<a href="https://arxiv.org/pdf/2407.09507">pdf</a>, <a href="https://arxiv.org/format/2407.09507">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Can Generative AI Replace Immunofluorescent Staining Processes? A Comparison Study of Synthetically Generated CellPainting Images from Brightfield </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Murdoch%2C+S">Siofra Murdoch</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+C">Chunling Tang</a>, <a href="/search/eess?searchtype=author&query=Papanastasiou%2C+G">Giorgos Papanastasiou</a>, <a href="/search/eess?searchtype=author&query=Cross-Zamirski%2C+J">Jan Cross-Zamirski</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yunzhe Guo</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xianglu Xiao</a>, <a href="/search/eess?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yinhai Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09507v2-abstract-short" style="display: inline;"> Cell imaging assays utilizing fluorescence stains are essential for observing sub-cellular organelles and their responses to perturbations. Immunofluorescent staining process is routinely in labs, however the recent innovations in generative AI is challenging the idea of IF staining are required. This is especially true when the availability and cost of specific fluorescence dyes is a problem to s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09507v2-abstract-full').style.display = 'inline'; document.getElementById('2407.09507v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09507v2-abstract-full" style="display: none;"> Cell imaging assays utilizing fluorescence stains are essential for observing sub-cellular organelles and their responses to perturbations. Immunofluorescent staining process is routinely in labs, however the recent innovations in generative AI is challenging the idea of IF staining are required. This is especially true when the availability and cost of specific fluorescence dyes is a problem to some labs. Furthermore, staining process takes time and leads to inter-intra technician and hinders downstream image and data analysis, and the reusability of image data for other projects. Recent studies showed the use of generated synthetic immunofluorescence (IF) images from brightfield (BF) images using generative AI algorithms in the literature. Therefore, in this study, we benchmark and compare five models from three types of IF generation backbones, CNN, GAN, and diffusion models, using a publicly available dataset. This paper not only serves as a comparative study to determine the best-performing model but also proposes a comprehensive analysis pipeline for evaluating the efficacy of generators in IF image synthesis. We highlighted the potential of deep learning-based generators for IF image synthesis, while also discussed potential issues and future research directions. Although generative AI shows promise in simplifying cell phenotyping using only BF images with IF staining, further research and validations are needed to address the key challenges of model generalisability, batch effects, feature relevance and computational costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09507v2-abstract-full').style.display = 'none'; document.getElementById('2407.09507v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05259">arXiv:2407.05259</a> <span> [<a href="https://arxiv.org/pdf/2407.05259">pdf</a>, <a href="https://arxiv.org/format/2407.05259">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-scale Conditional Generative Modeling for Microscopic Image Restoration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+L">Luzhe Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiongye Xiao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S">Shixuan Li</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jiawen Sun</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yi Huang</a>, <a href="/search/eess?searchtype=author&query=Ozcan%2C+A">Aydogan Ozcan</a>, <a href="/search/eess?searchtype=author&query=Bogdan%2C+P">Paul Bogdan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05259v1-abstract-short" style="display: inline;"> The advance of diffusion-based generative models in recent years has revolutionized state-of-the-art (SOTA) techniques in a wide variety of image analysis and synthesis tasks, whereas their adaptation on image restoration, particularly within computational microscopy remains theoretically and empirically underexplored. In this research, we introduce a multi-scale generative model that enhances con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05259v1-abstract-full').style.display = 'inline'; document.getElementById('2407.05259v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05259v1-abstract-full" style="display: none;"> The advance of diffusion-based generative models in recent years has revolutionized state-of-the-art (SOTA) techniques in a wide variety of image analysis and synthesis tasks, whereas their adaptation on image restoration, particularly within computational microscopy remains theoretically and empirically underexplored. In this research, we introduce a multi-scale generative model that enhances conditional image restoration through a novel exploitation of the Brownian Bridge process within wavelet domain. By initiating the Brownian Bridge diffusion process specifically at the lowest-frequency subband and applying generative adversarial networks at subsequent multi-scale high-frequency subbands in the wavelet domain, our method provides significant acceleration during training and sampling while sustaining a high image generation quality and diversity on par with SOTA diffusion models. Experimental results on various computational microscopy and imaging tasks confirm our method's robust performance and its considerable reduction in its sampling steps and time. This pioneering technique offers an efficient image restoration framework that harmonizes efficiency with quality, signifying a major stride in incorporating cutting-edge generative models into computational microscopy workflows. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05259v1-abstract-full').style.display = 'none'; document.getElementById('2407.05259v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01083">arXiv:2407.01083</a> <span> [<a href="https://arxiv.org/pdf/2407.01083">pdf</a>, <a href="https://arxiv.org/ps/2407.01083">ps</a>, <a href="https://arxiv.org/format/2407.01083">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Note On the Clark Conjecture On Time-Warped Bandlimited Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01083v1-abstract-short" style="display: inline;"> In this note, a result of a previous paper on the Clark conjecture on time-warped bandlimited signals is extended to a more general class of the time warping functions, which includes most of the common functions in practice. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01083v1-abstract-full" style="display: none;"> In this note, a result of a previous paper on the Clark conjecture on time-warped bandlimited signals is extended to a more general class of the time warping functions, which includes most of the common functions in practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01083v1-abstract-full').style.display = 'none'; document.getElementById('2407.01083v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09822">arXiv:2406.09822</a> <span> [<a href="https://arxiv.org/pdf/2406.09822">pdf</a>, <a href="https://arxiv.org/format/2406.09822">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> An I2I Inpainting Approach for Efficient Channel Knowledge Map Construction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jin%2C+Z">Zhenzhou Jin</a>, <a href="/search/eess?searchtype=author&query=You%2C+L">Li You</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jue Wang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09822v1-abstract-short" style="display: inline;"> Channel knowledge map (CKM) has received widespread attention as an emerging enabling technology for environment-aware wireless communications. It involves the construction of databases containing location-specific channel knowledge, which are then leveraged to facilitate channel state information (CSI) acquisition and transceiver design. In this context, a fundamental challenge lies in efficientl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09822v1-abstract-full').style.display = 'inline'; document.getElementById('2406.09822v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09822v1-abstract-full" style="display: none;"> Channel knowledge map (CKM) has received widespread attention as an emerging enabling technology for environment-aware wireless communications. It involves the construction of databases containing location-specific channel knowledge, which are then leveraged to facilitate channel state information (CSI) acquisition and transceiver design. In this context, a fundamental challenge lies in efficiently constructing the CKM based on a given wireless propagation environment. Most existing methods are based on stochastic modeling and sequence prediction, which do not fully exploit the inherent physical characteristics of the propagation environment, resulting in low accuracy and high computational complexity. To address these limitations, we propose a Laplacian pyramid (LP)-based CKM construction scheme to predict the channel knowledge at arbitrary locations in a targeted area. Specifically, we first view the channel knowledge as a 2-D image and transform the CKM construction problem into an image-to-image (I2I) inpainting task, which predicts the channel knowledge at a specific location by recovering the corresponding pixel value in the image matrix. Then, inspired by the reversible and closed-form structure of the LP, we show its natural suitability for our task in designing a fast I2I mapping network. For different frequency components of LP decomposition, we design tailored networks accordingly. Besides, to encode the global structural information of the propagation environment, we introduce self-attention and cross-covariance attention mechanisms in different layers, respectively. Finally, experimental results show that the proposed scheme outperforms the benchmark, achieving higher reconstruction accuracy while with lower computational complexity. Moreover, the proposed approach has a strong generalization ability and can be implemented in different wireless communication scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09822v1-abstract-full').style.display = 'none'; document.getElementById('2406.09822v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07498">arXiv:2406.07498</a> <span> [<a href="https://arxiv.org/pdf/2406.07498">pdf</a>, <a href="https://arxiv.org/format/2406.07498">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> RaD-Net 2: A causal two-stage repairing and denoising speech enhancement network with knowledge distillation and complex axial self-attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+M">Mingshuai Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhuangqi Chen</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+X">Xiaopeng Yan</a>, <a href="/search/eess?searchtype=author&query=Lv%2C+Y">Yuanjun Lv</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuanzeng Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07498v1-abstract-short" style="display: inline;"> In real-time speech communication systems, speech signals are often degraded by multiple distortions. Recently, a two-stage Repair-and-Denoising network (RaD-Net) was proposed with superior speech quality improvement in the ICASSP 2024 Speech Signal Improvement (SSI) Challenge. However, failure to use future information and constraint receptive field of convolution layers limit the system's perfor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07498v1-abstract-full').style.display = 'inline'; document.getElementById('2406.07498v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07498v1-abstract-full" style="display: none;"> In real-time speech communication systems, speech signals are often degraded by multiple distortions. Recently, a two-stage Repair-and-Denoising network (RaD-Net) was proposed with superior speech quality improvement in the ICASSP 2024 Speech Signal Improvement (SSI) Challenge. However, failure to use future information and constraint receptive field of convolution layers limit the system's performance. To mitigate these problems, we extend RaD-Net to its upgraded version, RaD-Net 2. Specifically, a causality-based knowledge distillation is introduced in the first stage to use future information in a causal way. We use the non-causal repairing network as the teacher to improve the performance of the causal repairing network. In addition, in the second stage, complex axial self-attention is applied in the denoising network's complex feature encoder/decoder. Experimental results on the ICASSP 2024 SSI Challenge blind test set show that RaD-Net 2 brings 0.10 OVRL DNSMOS improvement compared to RaD-Net. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07498v1-abstract-full').style.display = 'none'; document.getElementById('2406.07498v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Interspeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05961">arXiv:2406.05961</a> <span> [<a href="https://arxiv.org/pdf/2406.05961">pdf</a>, <a href="https://arxiv.org/format/2406.05961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> BS-PLCNet 2: Two-stage Band-split Packet Loss Concealment Network with Intra-model Knowledge Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zihan Zhang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuanzeng Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05961v1-abstract-short" style="display: inline;"> Audio packet loss is an inevitable problem in real-time speech communication. A band-split packet loss concealment network (BS-PLCNet) targeting full-band signals was recently proposed. Although it performs superiorly in the ICASSP 2024 PLC Challenge, BS-PLCNet is a large model with high computational complexity of 8.95G FLOPS. This paper presents its updated version, BS-PLCNet 2, to reduce comput… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05961v1-abstract-full').style.display = 'inline'; document.getElementById('2406.05961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05961v1-abstract-full" style="display: none;"> Audio packet loss is an inevitable problem in real-time speech communication. A band-split packet loss concealment network (BS-PLCNet) targeting full-band signals was recently proposed. Although it performs superiorly in the ICASSP 2024 PLC Challenge, BS-PLCNet is a large model with high computational complexity of 8.95G FLOPS. This paper presents its updated version, BS-PLCNet 2, to reduce computational complexity and improve performance further. Specifically, to compensate for the missing future information, in the wide-band module, we design a dual-path encoder structure (with non-causal and causal path) and leverage an intra-model knowledge distillation strategy to distill the future information from the non-causal teacher to the casual student. Moreover, we introduce a lightweight post-processing module after packet loss restoration to recover speech distortions and remove residual noise in the audio signal. With only 40% of original parameters in BS-PLCNet, BS-PLCNet 2 brings 0.18 PLCMOS improvement on the ICASSP 2024 PLC challenge blind set, achieving state-of-the-art performance on this dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05961v1-abstract-full').style.display = 'none'; document.getElementById('2406.05961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Interspeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04586">arXiv:2406.04586</a> <span> [<a href="https://arxiv.org/pdf/2406.04586">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Simple Channel Independent Beamforming Scheme With Parallel Uniform Circular Array </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jing%2C+H">Haiyue Jing</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+W">Wenchi Cheng</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04586v1-abstract-short" style="display: inline;"> In this letter, we consider a uniform circular array (UCA)-based line-of-sight multiple-input-multiple-output system, where the transmit and receive UCAs are parallel but non-coaxial with each other. We propose a simple channel-independent beamforming scheme with fast symbol-wise maximum likelihood detection. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04586v1-abstract-full" style="display: none;"> In this letter, we consider a uniform circular array (UCA)-based line-of-sight multiple-input-multiple-output system, where the transmit and receive UCAs are parallel but non-coaxial with each other. We propose a simple channel-independent beamforming scheme with fast symbol-wise maximum likelihood detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04586v1-abstract-full').style.display = 'none'; document.getElementById('2406.04586v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been published in IEEE Communications Letters. arXiv admin note: substantial text overlap with arXiv:1804.06621</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11883">arXiv:2405.11883</a> <span> [<a href="https://arxiv.org/pdf/2405.11883">pdf</a>, <a href="https://arxiv.org/format/2405.11883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Asynchronous MIMO-OFDM Massive Unsourced Random Access with Codeword Collisions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianya Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yongpeng Wu</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+J">Junyuan Gao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+W">Wenjun Zhang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Ng%2C+D+W+K">Derrick Wing Kwan Ng</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+C">Chengshan Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11883v2-abstract-short" style="display: inline;"> This paper investigates asynchronous multiple-input multiple-output (MIMO) massive unsourced random access (URA) in an orthogonal frequency division multiplexing (OFDM) system over frequency-selective fading channels, with the presence of both timing and carrier frequency offsets (TO and CFO) and non-negligible codeword collisions. The proposed coding framework segregates the data into two compone… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11883v2-abstract-full').style.display = 'inline'; document.getElementById('2405.11883v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11883v2-abstract-full" style="display: none;"> This paper investigates asynchronous multiple-input multiple-output (MIMO) massive unsourced random access (URA) in an orthogonal frequency division multiplexing (OFDM) system over frequency-selective fading channels, with the presence of both timing and carrier frequency offsets (TO and CFO) and non-negligible codeword collisions. The proposed coding framework segregates the data into two components, namely, preamble and coding parts, with the former being tree-coded and the latter LDPC-coded. By leveraging the dual sparsity of the equivalent channel across both codeword and delay domains (CD and DD), we develop a message-passing-based sparse Bayesian learning algorithm, combined with belief propagation and mean field, to iteratively estimate DD channel responses, TO, and delay profiles. Furthermore, by jointly leveraging the observations among multiple slots, we establish a novel graph-based algorithm to iteratively separate the superimposed channels and compensate for the phase rotations. Additionally, the proposed algorithm is applied to the flat fading scenario to estimate both TO and CFO, where the channel and offset estimation is enhanced by leveraging the geometric characteristics of the signal constellation. Extensive simulations reveal that the proposed algorithm achieves superior performance and substantial complexity reduction in both channel and offset estimation compared to the codebook enlarging-based counterparts, and enhanced data recovery performances compared to state-of-the-art URA schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11883v2-abstract-full').style.display = 'none'; document.getElementById('2405.11883v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by the IEEE Transactions on Wireless Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09734">arXiv:2404.09734</a> <span> [<a href="https://arxiv.org/pdf/2404.09734">pdf</a>, <a href="https://arxiv.org/format/2404.09734">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Weighted Sum-Rate Maximization for Movable Antenna-Enhanced Wireless Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+B">Biqian Feng</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yongpeng Wu</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+C">Chengshan Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09734v1-abstract-short" style="display: inline;"> This letter investigates the weighted sum rate maximization problem in movable antenna (MA)-enhanced systems. To reduce the computational complexity, we transform it into a more tractable weighted minimum mean square error (WMMSE) problem well-suited for MA. We then adopt the WMMSE algorithm and majorization-minimization algorithm to optimize the beamforming and antenna positions, respectively. Mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09734v1-abstract-full').style.display = 'inline'; document.getElementById('2404.09734v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09734v1-abstract-full" style="display: none;"> This letter investigates the weighted sum rate maximization problem in movable antenna (MA)-enhanced systems. To reduce the computational complexity, we transform it into a more tractable weighted minimum mean square error (WMMSE) problem well-suited for MA. We then adopt the WMMSE algorithm and majorization-minimization algorithm to optimize the beamforming and antenna positions, respectively. Moreover, we propose a planar movement mode, which constrains each MA to a specified area, we obtain a low-complexity closed-form solution. Numerical results demonstrate that the MA-enhanced system outperforms the conventional system. Besides, the computation time for the planar movement mode is reduced by approximately 30\% at a little performance expense. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09734v1-abstract-full').style.display = 'none'; document.getElementById('2404.09734v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Wireless Communications Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.07425">arXiv:2404.07425</a> <span> [<a href="https://arxiv.org/pdf/2404.07425">pdf</a>, <a href="https://arxiv.org/ps/2404.07425">ps</a>, <a href="https://arxiv.org/format/2404.07425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Precoder Design for User-Centric Network Massive MIMO with Matrix Manifold Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sun%2C+R">Rui Sun</a>, <a href="/search/eess?searchtype=author&query=You%2C+L">Li You</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+A">An-An Lu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+C">Chen Sun</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.07425v1-abstract-short" style="display: inline;"> In this paper, we investigate the precoder design for user-centric network (UCN) massive multiple-input multiple-output (mMIMO) downlink with matrix manifold optimization. In UCN mMIMO systems, each user terminal (UT) is served by a subset of base stations (BSs) instead of all the BSs, facilitating the implementation of the system and lowering the dimension of the precoders to be designed. By prov… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07425v1-abstract-full').style.display = 'inline'; document.getElementById('2404.07425v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.07425v1-abstract-full" style="display: none;"> In this paper, we investigate the precoder design for user-centric network (UCN) massive multiple-input multiple-output (mMIMO) downlink with matrix manifold optimization. In UCN mMIMO systems, each user terminal (UT) is served by a subset of base stations (BSs) instead of all the BSs, facilitating the implementation of the system and lowering the dimension of the precoders to be designed. By proving that the precoder set satisfying the per-BS power constraints forms a Riemannian submanifold of a linear product manifold, we transform the constrained precoder design problem in Euclidean space to an unconstrained one on the Riemannian submanifold. Riemannian ingredients, including orthogonal projection, Riemannian gradient, retraction and vector transport, of the problem on the Riemannian submanifold are further derived, with which the Riemannian conjugate gradient (RCG) design method is proposed for solving the unconstrained problem. The proposed method avoids the inverses of large dimensional matrices, which is beneficial in practice. The complexity analyses show the high computational efficiency of RCG precoder design. Simulation results demonstrate the numerical superiority of the proposed precoder design and the high efficiency of the UCN mMIMO system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07425v1-abstract-full').style.display = 'none'; document.getElementById('2404.07425v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 9 figures, journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07954">arXiv:2403.07954</a> <span> [<a href="https://arxiv.org/pdf/2403.07954">pdf</a>, <a href="https://arxiv.org/format/2403.07954">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Polynomial Graph Filters: A Novel Adaptive Krylov Subspace Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+K">Keke Huang</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+W">Wencai Cao</a>, <a href="/search/eess?searchtype=author&query=Ta%2C+H">Hoang Ta</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiaokui Xiao</a>, <a href="/search/eess?searchtype=author&query=Li%C3%B2%2C+P">Pietro Li貌</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07954v2-abstract-short" style="display: inline;"> Graph Neural Networks (GNNs), known as spectral graph filters, find a wide range of applications in web networks. To bypass eigendecomposition, polynomial graph filters are proposed to approximate graph filters by leveraging various polynomial bases for filter training. However, no existing studies have explored the diverse polynomial graph filters from a unified perspective for optimization. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07954v2-abstract-full').style.display = 'inline'; document.getElementById('2403.07954v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07954v2-abstract-full" style="display: none;"> Graph Neural Networks (GNNs), known as spectral graph filters, find a wide range of applications in web networks. To bypass eigendecomposition, polynomial graph filters are proposed to approximate graph filters by leveraging various polynomial bases for filter training. However, no existing studies have explored the diverse polynomial graph filters from a unified perspective for optimization. In this paper, we first unify polynomial graph filters, as well as the optimal filters of identical degrees into the Krylov subspace of the same order, thus providing equivalent expressive power theoretically. Next, we investigate the asymptotic convergence property of polynomials from the unified Krylov subspace perspective, revealing their limited adaptability in graphs with varying heterophily degrees. Inspired by those facts, we design a novel adaptive Krylov subspace approach to optimize polynomial bases with provable controllability over the graph spectrum so as to adapt various heterophily graphs. Subsequently, we propose AdaptKry, an optimized polynomial graph filter utilizing bases from the adaptive Krylov subspaces. Meanwhile, in light of the diverse spectral properties of complex graphs, we extend AdaptKry by leveraging multiple adaptive Krylov bases without incurring extra training costs. As a consequence, extended AdaptKry is able to capture the intricate characteristics of graphs and provide insights into their inherent complexity. We conduct extensive experiments across a series of real-world datasets. The experimental results demonstrate the superior filtering capability of AdaptKry, as well as the optimized efficacy of the adaptive Krylov basis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07954v2-abstract-full').style.display = 'none'; document.getElementById('2403.07954v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01271">arXiv:2402.01271</a> <span> [<a href="https://arxiv.org/pdf/2402.01271">pdf</a>, <a href="https://arxiv.org/format/2402.01271">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> An Intra-BRNN and GB-RVQ Based END-TO-END Neural Audio Codec </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+L">Linping Xu</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+J">Jiawei Jiang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+D">Dejun Zhang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+L">Li Chen</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+P">Piao Ding</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenyi Song</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+S">Sixing Yin</a>, <a href="/search/eess?searchtype=author&query=Sohel%2C+F">Ferdous Sohel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01271v1-abstract-short" style="display: inline;"> Recently, neural networks have proven to be effective in performing speech coding task at low bitrates. However, under-utilization of intra-frame correlations and the error of quantizer specifically degrade the reconstructed audio quality. To improve the coding quality, we present an end-to-end neural speech codec, namely CBRC (Convolutional and Bidirectional Recurrent neural Codec). An interleave… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01271v1-abstract-full').style.display = 'inline'; document.getElementById('2402.01271v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01271v1-abstract-full" style="display: none;"> Recently, neural networks have proven to be effective in performing speech coding task at low bitrates. However, under-utilization of intra-frame correlations and the error of quantizer specifically degrade the reconstructed audio quality. To improve the coding quality, we present an end-to-end neural speech codec, namely CBRC (Convolutional and Bidirectional Recurrent neural Codec). An interleaved structure using 1D-CNN and Intra-BRNN is designed to exploit the intra-frame correlations more efficiently. Furthermore, Group-wise and Beam-search Residual Vector Quantizer (GB-RVQ) is used to reduce the quantization noise. CBRC encodes audio every 20ms with no additional latency, which is suitable for real-time communication. Experimental results demonstrate the superiority of the proposed codec when comparing CBRC at 3kbps with Opus at 12kbps. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01271v1-abstract-full').style.display = 'none'; document.getElementById('2402.01271v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">INTERSPEECH 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13197">arXiv:2401.13197</a> <span> [<a href="https://arxiv.org/pdf/2401.13197">pdf</a>, <a href="https://arxiv.org/format/2401.13197">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Predicting Mitral Valve mTEER Surgery Outcomes Using Machine Learning and Deep Learning Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Vyas%2C+T">Tejas Vyas</a>, <a href="/search/eess?searchtype=author&query=Chowdhury%2C+M">Mohsena Chowdhury</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiaojiao Xiao</a>, <a href="/search/eess?searchtype=author&query=Claeys%2C+M">Mathias Claeys</a>, <a href="/search/eess?searchtype=author&query=Ong%2C+G">G茅raldine Ong</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+G">Guanghui Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13197v1-abstract-short" style="display: inline;"> Mitral Transcatheter Edge-to-Edge Repair (mTEER) is a medical procedure utilized for the treatment of mitral valve disorders. However, predicting the outcome of the procedure poses a significant challenge. This paper makes the first attempt to harness classical machine learning (ML) and deep learning (DL) techniques for predicting mitral valve mTEER surgery outcomes. To achieve this, we compiled a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13197v1-abstract-full').style.display = 'inline'; document.getElementById('2401.13197v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13197v1-abstract-full" style="display: none;"> Mitral Transcatheter Edge-to-Edge Repair (mTEER) is a medical procedure utilized for the treatment of mitral valve disorders. However, predicting the outcome of the procedure poses a significant challenge. This paper makes the first attempt to harness classical machine learning (ML) and deep learning (DL) techniques for predicting mitral valve mTEER surgery outcomes. To achieve this, we compiled a dataset from 467 patients, encompassing labeled echocardiogram videos and patient reports containing Transesophageal Echocardiography (TEE) measurements detailing Mitral Valve Repair (MVR) treatment outcomes. Leveraging this dataset, we conducted a benchmark evaluation of six ML algorithms and two DL models. The results underscore the potential of ML and DL in predicting mTEER surgery outcomes, providing insight for future investigation and advancements in this domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13197v1-abstract-full').style.display = 'none'; document.getElementById('2401.13197v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08887">arXiv:2401.08887</a> <span> [<a href="https://arxiv.org/pdf/2401.08887">pdf</a>, <a href="https://arxiv.org/ps/2401.08887">ps</a>, <a href="https://arxiv.org/format/2401.08887">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> NOTSOFAR-1 Challenge: New Datasets, Baseline, and Tasks for Distant Meeting Transcription </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Vinnikov%2C+A">Alon Vinnikov</a>, <a href="/search/eess?searchtype=author&query=Ivry%2C+A">Amir Ivry</a>, <a href="/search/eess?searchtype=author&query=Hurvitz%2C+A">Aviv Hurvitz</a>, <a href="/search/eess?searchtype=author&query=Abramovski%2C+I">Igor Abramovski</a>, <a href="/search/eess?searchtype=author&query=Koubi%2C+S">Sharon Koubi</a>, <a href="/search/eess?searchtype=author&query=Gurvich%2C+I">Ilya Gurvich</a>, <a href="/search/eess?searchtype=author&query=Pe%60er%2C+S">Shai Pe`er</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiong Xiao</a>, <a href="/search/eess?searchtype=author&query=Elizalde%2C+B+M">Benjamin Martinez Elizalde</a>, <a href="/search/eess?searchtype=author&query=Kanda%2C+N">Naoyuki Kanda</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaofei Wang</a>, <a href="/search/eess?searchtype=author&query=Shaer%2C+S">Shalev Shaer</a>, <a href="/search/eess?searchtype=author&query=Yagev%2C+S">Stav Yagev</a>, <a href="/search/eess?searchtype=author&query=Asher%2C+Y">Yossi Asher</a>, <a href="/search/eess?searchtype=author&query=Sivasankaran%2C+S">Sunit Sivasankaran</a>, <a href="/search/eess?searchtype=author&query=Gong%2C+Y">Yifan Gong</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+M">Min Tang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Huaming Wang</a>, <a href="/search/eess?searchtype=author&query=Krupka%2C+E">Eyal Krupka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08887v1-abstract-short" style="display: inline;"> We introduce the first Natural Office Talkers in Settings of Far-field Audio Recordings (``NOTSOFAR-1'') Challenge alongside datasets and baseline system. The challenge focuses on distant speaker diarization and automatic speech recognition (DASR) in far-field meeting scenarios, with single-channel and known-geometry multi-channel tracks, and serves as a launch platform for two new datasets: First… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08887v1-abstract-full').style.display = 'inline'; document.getElementById('2401.08887v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08887v1-abstract-full" style="display: none;"> We introduce the first Natural Office Talkers in Settings of Far-field Audio Recordings (``NOTSOFAR-1'') Challenge alongside datasets and baseline system. The challenge focuses on distant speaker diarization and automatic speech recognition (DASR) in far-field meeting scenarios, with single-channel and known-geometry multi-channel tracks, and serves as a launch platform for two new datasets: First, a benchmarking dataset of 315 meetings, averaging 6 minutes each, capturing a broad spectrum of real-world acoustic conditions and conversational dynamics. It is recorded across 30 conference rooms, featuring 4-8 attendees and a total of 35 unique speakers. Second, a 1000-hour simulated training dataset, synthesized with enhanced authenticity for real-world generalization, incorporating 15,000 real acoustic transfer functions. The tasks focus on single-device DASR, where multi-channel devices always share the same known geometry. This is aligned with common setups in actual conference rooms, and avoids technical complexities associated with multi-device tasks. It also allows for the development of geometry-specific solutions. The NOTSOFAR-1 Challenge aims to advance research in the field of distant conversational speech recognition, providing key resources to unlock the potential of data-driven methods, which we believe are currently constrained by the absence of comprehensive high-quality training and benchmarking datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08887v1-abstract-full').style.display = 'none'; document.getElementById('2401.08887v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.04389">arXiv:2401.04389</a> <span> [<a href="https://arxiv.org/pdf/2401.04389">pdf</a>, <a href="https://arxiv.org/format/2401.04389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> RaD-Net: A Repairing and Denoising Network for Speech Signal Improvement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+M">Mingshuai Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhuangqi Chen</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+X">Xiaopeng Yan</a>, <a href="/search/eess?searchtype=author&query=Lv%2C+Y">Yuanjun Lv</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuanzeng Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.04389v1-abstract-short" style="display: inline;"> This paper introduces our repairing and denoising network (RaD-Net) for the ICASSP 2024 Speech Signal Improvement (SSI) Challenge. We extend our previous framework based on a two-stage network and propose an upgraded model. Specifically, we replace the repairing network with COM-Net from TEA-PSE. In addition, multi-resolution discriminators and multi-band discriminators are adopted in the training… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04389v1-abstract-full').style.display = 'inline'; document.getElementById('2401.04389v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.04389v1-abstract-full" style="display: none;"> This paper introduces our repairing and denoising network (RaD-Net) for the ICASSP 2024 Speech Signal Improvement (SSI) Challenge. We extend our previous framework based on a two-stage network and propose an upgraded model. Specifically, we replace the repairing network with COM-Net from TEA-PSE. In addition, multi-resolution discriminators and multi-band discriminators are adopted in the training stage. Finally, we use a three-step training strategy to optimize our model. We submit two models with different sets of parameters to meet the RTF requirement of the two tracks. According to the official results, the proposed systems rank 2nd in track 1 and 3rd in track 2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04389v1-abstract-full').style.display = 'none'; document.getElementById('2401.04389v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.03687">arXiv:2401.03687</a> <span> [<a href="https://arxiv.org/pdf/2401.03687">pdf</a>, <a href="https://arxiv.org/format/2401.03687">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> BS-PLCNet: Band-split Packet Loss Concealment Network with Multi-task Learning Framework and Multi-discriminators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zihan Zhang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jiayao Sun</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuanzeng Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.03687v1-abstract-short" style="display: inline;"> Packet loss is a common and unavoidable problem in voice over internet phone (VoIP) systems. To deal with the problem, we propose a band-split packet loss concealment network (BS-PLCNet). Specifically, we split the full-band signal into wide-band (0-8kHz) and high-band (8-24kHz). The wide-band signals are processed by a gated convolutional recurrent network (GCRN), while the high-band counterpart… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03687v1-abstract-full').style.display = 'inline'; document.getElementById('2401.03687v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.03687v1-abstract-full" style="display: none;"> Packet loss is a common and unavoidable problem in voice over internet phone (VoIP) systems. To deal with the problem, we propose a band-split packet loss concealment network (BS-PLCNet). Specifically, we split the full-band signal into wide-band (0-8kHz) and high-band (8-24kHz). The wide-band signals are processed by a gated convolutional recurrent network (GCRN), while the high-band counterpart is processed by a simple GRU network. To ensure high speech quality and automatic speech recognition (ASR) compatibility, multi-task learning (MTL) framework including fundamental frequency (f0) prediction, linguistic awareness, and multi-discriminators are used. The proposed approach tied for 1st place in the ICASSP 2024 PLC Challenge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03687v1-abstract-full').style.display = 'none'; document.getElementById('2401.03687v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.00413">arXiv:2401.00413</a> <span> [<a href="https://arxiv.org/pdf/2401.00413">pdf</a>, <a href="https://arxiv.org/format/2401.00413">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Real-Time FJ/MAC PDE Solvers via Tensorized, Back-Propagation-Free Optical PINN Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yequan Zhao</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xian Xiao</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+X">Xinling Yu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Ziyue Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhixiong Chen</a>, <a href="/search/eess?searchtype=author&query=Kurczveil%2C+G">Geza Kurczveil</a>, <a href="/search/eess?searchtype=author&query=Beausoleil%2C+R+G">Raymond G. Beausoleil</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zheng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.00413v2-abstract-short" style="display: inline;"> Solving partial differential equations (PDEs) numerically often requires huge computing time, energy cost, and hardware resources in practical applications. This has limited their applications in many scenarios (e.g., autonomous systems, supersonic flows) that have a limited energy budget and require near real-time response. Leveraging optical computing, this paper develops an on-chip training fra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00413v2-abstract-full').style.display = 'inline'; document.getElementById('2401.00413v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.00413v2-abstract-full" style="display: none;"> Solving partial differential equations (PDEs) numerically often requires huge computing time, energy cost, and hardware resources in practical applications. This has limited their applications in many scenarios (e.g., autonomous systems, supersonic flows) that have a limited energy budget and require near real-time response. Leveraging optical computing, this paper develops an on-chip training framework for physics-informed neural networks (PINNs), aiming to solve high-dimensional PDEs with fJ/MAC photonic power consumption and ultra-low latency. Despite the ultra-high speed of optical neural networks, training a PINN on an optical chip is hard due to (1) the large size of photonic devices, and (2) the lack of scalable optical memory devices to store the intermediate results of back-propagation (BP). To enable realistic optical PINN training, this paper presents a scalable method to avoid the BP process. We also employ a tensor-compressed approach to improve the convergence and scalability of our optical PINN training. This training framework is designed with tensorized optical neural networks (TONN) for scalable inference acceleration and MZI phase-domain tuning for \textit{in-situ} optimization. Our simulation results of a 20-dim HJB PDE show that our photonic accelerator can reduce the number of MZIs by a factor of $1.17\times 10^3$, with only $1.36$ J and $1.15$ s to solve this equation. This is the first real-size optical PINN training framework that can be applied to solve high-dimensional PDEs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00413v2-abstract-full').style.display = 'none'; document.getElementById('2401.00413v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ML with New Compute Paradigms (MLNCP) at NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.13311">arXiv:2312.13311</a> <span> [<a href="https://arxiv.org/pdf/2312.13311">pdf</a>, <a href="https://arxiv.org/format/2312.13311">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Unlocking Deep Learning: A BP-Free Approach for Parallel Block-Wise Training of Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cheng%2C+A">Anzhe Cheng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhenkun Wang</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+C">Chenzhong Yin</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+M">Mingxi Cheng</a>, <a href="/search/eess?searchtype=author&query=Ping%2C+H">Heng Ping</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiongye Xiao</a>, <a href="/search/eess?searchtype=author&query=Nazarian%2C+S">Shahin Nazarian</a>, <a href="/search/eess?searchtype=author&query=Bogdan%2C+P">Paul Bogdan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.13311v1-abstract-short" style="display: inline;"> Backpropagation (BP) has been a successful optimization technique for deep learning models. However, its limitations, such as backward- and update-locking, and its biological implausibility, hinder the concurrent updating of layers and do not mimic the local learning processes observed in the human brain. To address these issues, recent research has suggested using local error signals to asynchron… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13311v1-abstract-full').style.display = 'inline'; document.getElementById('2312.13311v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.13311v1-abstract-full" style="display: none;"> Backpropagation (BP) has been a successful optimization technique for deep learning models. However, its limitations, such as backward- and update-locking, and its biological implausibility, hinder the concurrent updating of layers and do not mimic the local learning processes observed in the human brain. To address these issues, recent research has suggested using local error signals to asynchronously train network blocks. However, this approach often involves extensive trial-and-error iterations to determine the best configuration for local training. This includes decisions on how to decouple network blocks and which auxiliary networks to use for each block. In our work, we introduce a novel BP-free approach: a block-wise BP-free (BWBPF) neural network that leverages local error signals to optimize distinct sub-neural networks separately, where the global loss is only responsible for updating the output layer. The local error signals used in the BP-free model can be computed in parallel, enabling a potential speed-up in the weight update process through parallel implementation. Our experimental results consistently show that this approach can identify transferable decoupled architectures for VGG and ResNet variations, outperforming models trained with end-to-end backpropagation and other state-of-the-art block-wise learning techniques on datasets such as CIFAR-10 and Tiny-ImageNet. The code is released at https://github.com/Belis0811/BWBPF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13311v1-abstract-full').style.display = 'none'; document.getElementById('2312.13311v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper has been accepted by ICASSP2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.06969">arXiv:2312.06969</a> <span> [<a href="https://arxiv.org/pdf/2312.06969">pdf</a>, <a href="https://arxiv.org/ps/2312.06969">ps</a>, <a href="https://arxiv.org/format/2312.06969">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Channel Estimation for Movable Antenna Communication Systems: A Framework Based on Compressed Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zhenyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+S">Songqi Cao</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yanming Liu</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.06969v1-abstract-short" style="display: inline;"> Movable antenna (MA) is a new technology with great potential to improve communication performance by enabling local movement of antennas for pursuing better channel conditions. In particular, the acquisition of complete channel state information (CSI) between the transmitter (Tx) and receiver (Rx) regions is an essential problem for MA systems to reap performance gains. In this paper, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06969v1-abstract-full').style.display = 'inline'; document.getElementById('2312.06969v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.06969v1-abstract-full" style="display: none;"> Movable antenna (MA) is a new technology with great potential to improve communication performance by enabling local movement of antennas for pursuing better channel conditions. In particular, the acquisition of complete channel state information (CSI) between the transmitter (Tx) and receiver (Rx) regions is an essential problem for MA systems to reap performance gains. In this paper, we propose a general channel estimation framework for MA systems by exploiting the multi-path field response channel structure. Specifically, the angles of departure (AoDs), angles of arrival (AoAs), and complex coefficients of the multi-path components (MPCs) are jointly estimated by employing the compressed sensing method, based on multiple channel measurements at designated positions of the Tx-MA and Rx-MA. Under this framework, the Tx-MA and Rx-MA measurement positions fundamentally determine the measurement matrix for compressed sensing, of which the mutual coherence is analyzed from the perspective of Fourier transform. Moreover, two criteria for MA measurement positions are provided to guarantee the successful recovery of MPCs. Then, we propose several MA measurement position setups and compare their performance. Finally, comprehensive simulation results show that the proposed framework is able to estimate the complete CSI between the Tx and Rx regions with a high accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06969v1-abstract-full').style.display = 'none'; document.getElementById('2312.06969v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.16565">arXiv:2311.16565</a> <span> [<a href="https://arxiv.org/pdf/2311.16565">pdf</a>, <a href="https://arxiv.org/format/2311.16565">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DiffusionTalker: Personalization and Acceleration for Speech-Driven 3D Face Diffuser </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+P">Peng Chen</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+X">Xiaobao Wei</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+M">Ming Lu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Y">Yitong Zhu</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+N">Naiming Yao</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xingyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+H">Hui Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.16565v2-abstract-short" style="display: inline;"> Speech-driven 3D facial animation has been an attractive task in both academia and industry. Traditional methods mostly focus on learning a deterministic mapping from speech to animation. Recent approaches start to consider the non-deterministic fact of speech-driven 3D face animation and employ the diffusion model for the task. However, personalizing facial animation and accelerating animation ge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16565v2-abstract-full').style.display = 'inline'; document.getElementById('2311.16565v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.16565v2-abstract-full" style="display: none;"> Speech-driven 3D facial animation has been an attractive task in both academia and industry. Traditional methods mostly focus on learning a deterministic mapping from speech to animation. Recent approaches start to consider the non-deterministic fact of speech-driven 3D face animation and employ the diffusion model for the task. However, personalizing facial animation and accelerating animation generation are still two major limitations of existing diffusion-based methods. To address the above limitations, we propose DiffusionTalker, a diffusion-based method that utilizes contrastive learning to personalize 3D facial animation and knowledge distillation to accelerate 3D animation generation. Specifically, to enable personalization, we introduce a learnable talking identity to aggregate knowledge in audio sequences. The proposed identity embeddings extract customized facial cues across different people in a contrastive learning manner. During inference, users can obtain personalized facial animation based on input audio, reflecting a specific talking style. With a trained diffusion model with hundreds of steps, we distill it into a lightweight model with 8 steps for acceleration. Extensive experiments are conducted to demonstrate that our method outperforms state-of-the-art methods. The code will be released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16565v2-abstract-full').style.display = 'none'; document.getElementById('2311.16565v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.11804">arXiv:2311.11804</a> <span> [<a href="https://arxiv.org/pdf/2311.11804">pdf</a>, <a href="https://arxiv.org/ps/2311.11804">ps</a>, <a href="https://arxiv.org/format/2311.11804">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Robust Multidimentional Chinese Remainder Theorem for Integer Vector Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+L">Li Xiao</a>, <a href="/search/eess?searchtype=author&query=Huo%2C+H">Haiye Huo</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.11804v1-abstract-short" style="display: inline;"> The problem of robustly reconstructing an integer vector from its erroneous remainders appears in many applications in the field of multidimensional (MD) signal processing. To address this problem, a robust MD Chinese remainder theorem (CRT) was recently proposed for a special class of moduli, where the remaining integer matrices left-divided by a greatest common left divisor (gcld) of all the mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11804v1-abstract-full').style.display = 'inline'; document.getElementById('2311.11804v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.11804v1-abstract-full" style="display: none;"> The problem of robustly reconstructing an integer vector from its erroneous remainders appears in many applications in the field of multidimensional (MD) signal processing. To address this problem, a robust MD Chinese remainder theorem (CRT) was recently proposed for a special class of moduli, where the remaining integer matrices left-divided by a greatest common left divisor (gcld) of all the moduli are pairwise commutative and coprime. The strict constraint on the moduli limits the usefulness of the robust MD-CRT in practice. In this paper, we investigate the robust MD-CRT for a general set of moduli. We first introduce a necessary and sufficient condition on the difference between paired remainder errors, followed by a simple sufficient condition on the remainder error bound, for the robust MD-CRT for general moduli, where the conditions are associated with (the minimum distances of) these lattices generated by gcld's of paired moduli, and a closed-form reconstruction algorithm is presented. We then generalize the above results of the robust MD-CRT from integer vectors/matrices to real ones. Finally, we validate the robust MD-CRT for general moduli by employing numerical simulations, and apply it to MD sinusoidal frequency estimation based on multiple sub-Nyquist samplers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11804v1-abstract-full').style.display = 'none'; document.getElementById('2311.11804v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 5 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.10416">arXiv:2311.10416</a> <span> [<a href="https://arxiv.org/pdf/2311.10416">pdf</a>, <a href="https://arxiv.org/format/2311.10416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Meta-DSP: A Meta-Learning Approach for Data-Driven Nonlinear Compensation in High-Speed Optical Fiber Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xinyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Z">Zhennan Zhou</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+B">Bin Dong</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+D">Dingjiong Ma</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+L">Li Zhou</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jie Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.10416v1-abstract-short" style="display: inline;"> Non-linear effects in long-haul, high-speed optical fiber systems significantly hinder channel capacity. While the Digital Backward Propagation algorithm (DBP) with adaptive filter (ADF) can mitigate these effects, it suffers from an overwhelming computational complexity. Recent solutions have incorporated deep neural networks in a data-driven strategy to alleviate this complexity in the DBP model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10416v1-abstract-full').style.display = 'inline'; document.getElementById('2311.10416v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.10416v1-abstract-full" style="display: none;"> Non-linear effects in long-haul, high-speed optical fiber systems significantly hinder channel capacity. While the Digital Backward Propagation algorithm (DBP) with adaptive filter (ADF) can mitigate these effects, it suffers from an overwhelming computational complexity. Recent solutions have incorporated deep neural networks in a data-driven strategy to alleviate this complexity in the DBP model. However, these models are often limited to a specific symbol rate and channel number, necessitating retraining for different settings, their performance declines significantly under high-speed and high-power conditions. We introduce Meta-DSP, a novel data-driven nonlinear compensation model based on meta-learning that processes multi-modal data across diverse transmission rates, power levels, and channel numbers. This not only enhances signal quality but also substantially reduces the complexity of the nonlinear processing algorithm. Our model delivers a 0.7 dB increase in the Q-factor over Electronic Dispersion Compensation (EDC), and compared to DBP, it curtails computational complexity by a factor of ten while retaining comparable performance. From the perspective of the entire signal processing system, the core idea of Meta-DSP can be employed in any segment of the overall communication system to enhance the model's scalability and generalization performance. Our research substantiates Meta-DSP's proficiency in addressing the critical parameters defining optical communication networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10416v1-abstract-full').style.display = 'none'; document.getElementById('2311.10416v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.05236">arXiv:2311.05236</a> <span> [<a href="https://arxiv.org/pdf/2311.05236">pdf</a>, <a href="https://arxiv.org/ps/2311.05236">ps</a>, <a href="https://arxiv.org/format/2311.05236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Delay Doppler Transform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.05236v2-abstract-short" style="display: inline;"> This letter is to introduce delay Doppler transform (DDT) for a time domain signal. It is motivated by the recent studies in wireless communications over delay Doppler channels that have both time and Doppler spreads, such as, satellite communication channels. We present some simple properties of DDT as well. The DDT study may provide insights of delay Doppler channels. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.05236v2-abstract-full" style="display: none;"> This letter is to introduce delay Doppler transform (DDT) for a time domain signal. It is motivated by the recent studies in wireless communications over delay Doppler channels that have both time and Doppler spreads, such as, satellite communication channels. We present some simple properties of DDT as well. The DDT study may provide insights of delay Doppler channels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05236v2-abstract-full').style.display = 'none'; document.getElementById('2311.05236v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04992">arXiv:2310.04992</a> <span> [<a href="https://arxiv.org/pdf/2310.04992">pdf</a>, <a href="https://arxiv.org/format/2310.04992">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VisionFM: a Multi-Modal Multi-Task Vision Foundation Model for Generalist Ophthalmic Artificial Intelligence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qiu%2C+J">Jianing Qiu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+J">Jian Wu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+H">Hao Wei</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+P">Peilun Shi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+M">Minqing Zhang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Yunyun Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+L">Lin Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hanruo Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hongyi Liu</a>, <a href="/search/eess?searchtype=author&query=Hou%2C+S">Simeng Hou</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yuyang Zhao</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+X">Xuehui Shi</a>, <a href="/search/eess?searchtype=author&query=Xian%2C+J">Junfang Xian</a>, <a href="/search/eess?searchtype=author&query=Qu%2C+X">Xiaoxia Qu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+S">Sirui Zhu</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+L">Lijie Pan</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xiaoniao Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaojia Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+S">Shuai Jiang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kebing Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chenlong Yang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+M">Mingqiang Chen</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+S">Sujie Fan</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+J">Jianhua Hu</a>, <a href="/search/eess?searchtype=author&query=Lv%2C+A">Aiguo Lv</a> , et al. (17 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04992v1-abstract-short" style="display: inline;"> We present VisionFM, a foundation model pre-trained with 3.4 million ophthalmic images from 560,457 individuals, covering a broad range of ophthalmic diseases, modalities, imaging devices, and demography. After pre-training, VisionFM provides a foundation to foster multiple ophthalmic artificial intelligence (AI) applications, such as disease screening and diagnosis, disease prognosis, subclassifi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04992v1-abstract-full').style.display = 'inline'; document.getElementById('2310.04992v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04992v1-abstract-full" style="display: none;"> We present VisionFM, a foundation model pre-trained with 3.4 million ophthalmic images from 560,457 individuals, covering a broad range of ophthalmic diseases, modalities, imaging devices, and demography. After pre-training, VisionFM provides a foundation to foster multiple ophthalmic artificial intelligence (AI) applications, such as disease screening and diagnosis, disease prognosis, subclassification of disease phenotype, and systemic biomarker and disease prediction, with each application enhanced with expert-level intelligence and accuracy. The generalist intelligence of VisionFM outperformed ophthalmologists with basic and intermediate levels in jointly diagnosing 12 common ophthalmic diseases. Evaluated on a new large-scale ophthalmic disease diagnosis benchmark database, as well as a new large-scale segmentation and detection benchmark database, VisionFM outperformed strong baseline deep neural networks. The ophthalmic image representations learned by VisionFM exhibited noteworthy explainability, and demonstrated strong generalizability to new ophthalmic modalities, disease spectrum, and imaging devices. As a foundation model, VisionFM has a large capacity to learn from diverse ophthalmic imaging data and disparate datasets. To be commensurate with this capacity, in addition to the real data used for pre-training, we also generated and leveraged synthetic ophthalmic imaging data. Experimental results revealed that synthetic data that passed visual Turing tests, can also enhance the representation learning capability of VisionFM, leading to substantial performance gains on downstream ophthalmic AI tasks. Beyond the ophthalmic AI applications developed, validated, and demonstrated in this work, substantial further applications can be achieved in an efficient and cost-effective manner using VisionFM as the foundation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04992v1-abstract-full').style.display = 'none'; document.getElementById('2310.04992v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04715">arXiv:2310.04715</a> <span> [<a href="https://arxiv.org/pdf/2310.04715">pdf</a>, <a href="https://arxiv.org/format/2310.04715">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> An Exploration of Task-decoupling on Two-stage Neural Post Filter for Real-time Personalized Acoustic Echo Cancellation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zihan Zhang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jiayao Sun</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Ziqian Wang</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+X">Xiaopeng Yan</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04715v1-abstract-short" style="display: inline;"> Deep learning based techniques have been popularly adopted in acoustic echo cancellation (AEC). Utilization of speaker representation has extended the frontier of AEC, thus attracting many researchers' interest in personalized acoustic echo cancellation (PAEC). Meanwhile, task-decoupling strategies are widely adopted in speech enhancement. To further explore the task-decoupling approach, we propos… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04715v1-abstract-full').style.display = 'inline'; document.getElementById('2310.04715v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04715v1-abstract-full" style="display: none;"> Deep learning based techniques have been popularly adopted in acoustic echo cancellation (AEC). Utilization of speaker representation has extended the frontier of AEC, thus attracting many researchers' interest in personalized acoustic echo cancellation (PAEC). Meanwhile, task-decoupling strategies are widely adopted in speech enhancement. To further explore the task-decoupling approach, we propose to use a two-stage task-decoupling post-filter (TDPF) in PAEC. Furthermore, a multi-scale local-global speaker representation is applied to improve speaker extraction in PAEC. Experimental results indicate that the task-decoupling model can yield better performance than a single joint network. The optimal approach is to decouple the echo cancellation from noise and interference speech suppression. Based on the task-decoupling sequence, optimal training strategies for the two-stage model are explored afterwards. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04715v1-abstract-full').style.display = 'none'; document.getElementById('2310.04715v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted to ASRU 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.12660">arXiv:2309.12660</a> <span> [<a href="https://arxiv.org/pdf/2309.12660">pdf</a>, <a href="https://arxiv.org/ps/2309.12660">ps</a>, <a href="https://arxiv.org/format/2309.12660">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Disturbance Rejection Control for Autonomous Trolley Collection Robots with Prescribed Performance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xi%2C+R">Rui-Dong Xi</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Liang Lu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xue Zhang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiao Xiao</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+B">Bingyi Xia</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiankun Wang</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+M+Q+-">Max Q. -H. Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.12660v1-abstract-short" style="display: inline;"> Trajectory tracking control of autonomous trolley collection robots (ATCR) is an ambitious work due to the complex environment, serious noise and external disturbances. This work investigates a control scheme for ATCR subjecting to severe environmental interference. A kinematics model based adaptive sliding mode disturbance observer with fast convergence is first proposed to estimate the lumped di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.12660v1-abstract-full').style.display = 'inline'; document.getElementById('2309.12660v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.12660v1-abstract-full" style="display: none;"> Trajectory tracking control of autonomous trolley collection robots (ATCR) is an ambitious work due to the complex environment, serious noise and external disturbances. This work investigates a control scheme for ATCR subjecting to severe environmental interference. A kinematics model based adaptive sliding mode disturbance observer with fast convergence is first proposed to estimate the lumped disturbances. On this basis, a robust controller with prescribed performance is proposed using a backstepping technique, which improves the transient performance and guarantees fast convergence. Simulation outcomes have been provided to illustrate the effectiveness of the proposed control scheme. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.12660v1-abstract-full').style.display = 'none'; document.getElementById('2309.12660v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.12521">arXiv:2309.12521</a> <span> [<a href="https://arxiv.org/pdf/2309.12521">pdf</a>, <a href="https://arxiv.org/format/2309.12521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Profile-Error-Tolerant Target-Speaker Voice Activity Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+D">Dongmei Wang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiong Xiao</a>, <a href="/search/eess?searchtype=author&query=Kanda%2C+N">Naoyuki Kanda</a>, <a href="/search/eess?searchtype=author&query=Yousefi%2C+M">Midia Yousefi</a>, <a href="/search/eess?searchtype=author&query=Yoshioka%2C+T">Takuya Yoshioka</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+J">Jian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.12521v2-abstract-short" style="display: inline;"> Target-Speaker Voice Activity Detection (TS-VAD) utilizes a set of speaker profiles alongside an input audio signal to perform speaker diarization. While its superiority over conventional methods has been demonstrated, the method can suffer from errors in speaker profiles, as those profiles are typically obtained by running a traditional clustering-based diarization method over the input signal. T… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.12521v2-abstract-full').style.display = 'inline'; document.getElementById('2309.12521v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.12521v2-abstract-full" style="display: none;"> Target-Speaker Voice Activity Detection (TS-VAD) utilizes a set of speaker profiles alongside an input audio signal to perform speaker diarization. While its superiority over conventional methods has been demonstrated, the method can suffer from errors in speaker profiles, as those profiles are typically obtained by running a traditional clustering-based diarization method over the input signal. This paper proposes an extension to TS-VAD, called Profile-Error-Tolerant TS-VAD (PET-TSVAD), which is robust to such speaker profile errors. This is achieved by employing transformer-based TS-VAD that can handle a variable number of speakers and further introducing a set of additional pseudo-speaker profiles to handle speakers undetected during the first pass diarization. During training, we use speaker profiles estimated by multiple different clustering algorithms to reduce the mismatch between the training and testing conditions regarding speaker profiles. Experimental results show that PET-TSVAD consistently outperforms the existing TS-VAD method on both the VoxConverse and DIHARD-I datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.12521v2-abstract-full').style.display = 'none'; document.getElementById('2309.12521v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submission for ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.16021">arXiv:2308.16021</a> <span> [<a href="https://arxiv.org/pdf/2308.16021">pdf</a>, <a href="https://arxiv.org/format/2308.16021">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> CALM: Contrastive Cross-modal Speaking Style Modeling for Expressive Text-to-Speech Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Meng%2C+Y">Yi Meng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Z">Zhiyong Wu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tingtian Li</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Z">Zixun Sun</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xinyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+C">Chi Sun</a>, <a href="/search/eess?searchtype=author&query=Zhan%2C+H">Hui Zhan</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.16021v1-abstract-short" style="display: inline;"> To further improve the speaking styles of synthesized speeches, current text-to-speech (TTS) synthesis systems commonly employ reference speeches to stylize their outputs instead of just the input texts. These reference speeches are obtained by manual selection which is resource-consuming, or selected by semantic features. However, semantic features contain not only style-related information, but… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.16021v1-abstract-full').style.display = 'inline'; document.getElementById('2308.16021v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.16021v1-abstract-full" style="display: none;"> To further improve the speaking styles of synthesized speeches, current text-to-speech (TTS) synthesis systems commonly employ reference speeches to stylize their outputs instead of just the input texts. These reference speeches are obtained by manual selection which is resource-consuming, or selected by semantic features. However, semantic features contain not only style-related information, but also style irrelevant information. The information irrelevant to speaking style in the text could interfere the reference audio selection and result in improper speaking styles. To improve the reference selection, we propose Contrastive Acoustic-Linguistic Module (CALM) to extract the Style-related Text Feature (STF) from the text. CALM optimizes the correlation between the speaking style embedding and the extracted STF with contrastive learning. Thus, a certain number of the most appropriate reference speeches for the input text are selected by retrieving the speeches with the top STF similarities. Then the style embeddings are weighted summarized according to their STF similarities and used to stylize the synthesized speech of TTS. Experiment results demonstrate the effectiveness of our proposed approach, with both objective evaluations and subjective evaluations on the speaking styles of the synthesized speeches outperform a baseline approach with semantic-feature-based reference selection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.16021v1-abstract-full').style.display = 'none'; document.getElementById('2308.16021v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by InterSpeech 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.13148">arXiv:2308.13148</a> <span> [<a href="https://arxiv.org/pdf/2308.13148">pdf</a>, <a href="https://arxiv.org/ps/2308.13148">ps</a>, <a href="https://arxiv.org/format/2308.13148">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Understanding Turbo Codes: A Signal Processing Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.13148v1-abstract-short" style="display: inline;"> In this paper, we study turbo codes from the digital signal processing point of view by defining turbo codes over the complex field. It is known that iterative decoding and interleaving between concatenated parallel codes are two key elements that make turbo codes perform significantly better than the conventional error control codes. This is analytically illustrated in this paper by showing that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13148v1-abstract-full').style.display = 'inline'; document.getElementById('2308.13148v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.13148v1-abstract-full" style="display: none;"> In this paper, we study turbo codes from the digital signal processing point of view by defining turbo codes over the complex field. It is known that iterative decoding and interleaving between concatenated parallel codes are two key elements that make turbo codes perform significantly better than the conventional error control codes. This is analytically illustrated in this paper by showing that the decoded noise mean power in the iterative decoding decreases when the number of iterations increases, as long as the interleaving decorrelates the noise after each iterative decoding step. An analytic decreasing rate and the limit of the decoded noise mean power are given. The limit of the decoded noise mean power of the iterative decoding of a turbo code with two parallel codes with their rates less than 1/2 is one third of the noise power before the decoding, which can not be achieved by any non-turbo codes with the same rate. From this study, the role of designing a good interleaver can also be clearly seen. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13148v1-abstract-full').style.display = 'none'; document.getElementById('2308.13148v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.09512">arXiv:2308.09512</a> <span> [<a href="https://arxiv.org/pdf/2308.09512">pdf</a>, <a href="https://arxiv.org/format/2308.09512">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Multiuser Communications with Movable-Antenna Base Station: Joint Antenna Positioning, Receive Combining, and Power Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+Z">Zhenyu Xiao</a>, <a href="/search/eess?searchtype=author&query=Pi%2C+X">Xiangyu Pi</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lipeng Zhu</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.09512v1-abstract-short" style="display: inline;"> Movable antenna (MA) is an emerging technology which enables a local movement of the antenna in the transmitter/receiver region for improving the channel condition and communication performance. In this paper, we study the deployment of multiple MAs at the base station (BS) for enhancing the multiuser communication performance. First, we model the multiuser channel in the uplink to characterize th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09512v1-abstract-full').style.display = 'inline'; document.getElementById('2308.09512v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.09512v1-abstract-full" style="display: none;"> Movable antenna (MA) is an emerging technology which enables a local movement of the antenna in the transmitter/receiver region for improving the channel condition and communication performance. In this paper, we study the deployment of multiple MAs at the base station (BS) for enhancing the multiuser communication performance. First, we model the multiuser channel in the uplink to characterize the wireless channel variation due to MAs' movements at the BS. Then, an optimization problem is formulated to maximize the minimum achievable rate among multiple users for MA-aided uplink multiuser communications by jointly optimizing the MAs' positions, their receive combining at the BS, and the transmit power of users, under the constraints of finite moving region for MAs, minimum inter-MA distance, and maximum transmit power of each user. To solve this challenging non-convex optimization problem, a two-loop iterative algorithm is proposed by leveraging the particle swarm optimization (PSO) method. Specifically, the outer-loop updates the positions of a set of particles, where each particle's position represents one realization of the antenna position vector (APV) of all MAs. The inner-loop implements the fitness evaluation for each particle in terms of the max-min achievable rate of multiple users with its corresponding APV, where the receive combining matrix of the BS and the transmit power of each user are optimized by applying the block coordinate descent (BCD) technique. Simulation results show that the antenna position optimization for MAs-aided BSs can significantly improve the rate performance as compared to conventional BSs with fixed-position antennas (FPAs). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09512v1-abstract-full').style.display = 'none'; document.getElementById('2308.09512v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2308.05546</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.10316">arXiv:2307.10316</a> <span> [<a href="https://arxiv.org/pdf/2307.10316">pdf</a>, <a href="https://arxiv.org/format/2307.10316">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> CPCM: Contextual Point Cloud Modeling for Weakly-supervised Point Cloud Semantic Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+L">Lizhao Liu</a>, <a href="/search/eess?searchtype=author&query=Zhuang%2C+Z">Zhuangwei Zhuang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+S">Shangxin Huang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xunlong Xiao</a>, <a href="/search/eess?searchtype=author&query=Xiang%2C+T">Tianhang Xiang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Cen Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jingdong Wang</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+M">Mingkui Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.10316v1-abstract-short" style="display: inline;"> We study the task of weakly-supervised point cloud semantic segmentation with sparse annotations (e.g., less than 0.1% points are labeled), aiming to reduce the expensive cost of dense annotations. Unfortunately, with extremely sparse annotated points, it is very difficult to extract both contextual and object information for scene understanding such as semantic segmentation. Motivated by masked m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.10316v1-abstract-full').style.display = 'inline'; document.getElementById('2307.10316v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.10316v1-abstract-full" style="display: none;"> We study the task of weakly-supervised point cloud semantic segmentation with sparse annotations (e.g., less than 0.1% points are labeled), aiming to reduce the expensive cost of dense annotations. Unfortunately, with extremely sparse annotated points, it is very difficult to extract both contextual and object information for scene understanding such as semantic segmentation. Motivated by masked modeling (e.g., MAE) in image and video representation learning, we seek to endow the power of masked modeling to learn contextual information from sparsely-annotated points. However, directly applying MAE to 3D point clouds with sparse annotations may fail to work. First, it is nontrivial to effectively mask out the informative visual context from 3D point clouds. Second, how to fully exploit the sparse annotations for context modeling remains an open question. In this paper, we propose a simple yet effective Contextual Point Cloud Modeling (CPCM) method that consists of two parts: a region-wise masking (RegionMask) strategy and a contextual masked training (CMT) method. Specifically, RegionMask masks the point cloud continuously in geometric space to construct a meaningful masked prediction task for subsequent context learning. CMT disentangles the learning of supervised segmentation and unsupervised masked context prediction for effectively learning the very limited labeled points and mass unlabeled points, respectively. Extensive experiments on the widely-tested ScanNet V2 and S3DIS benchmarks demonstrate the superiority of CPCM over the state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.10316v1-abstract-full').style.display = 'none'; document.getElementById('2307.10316v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICCV 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.05386">arXiv:2307.05386</a> <span> [<a href="https://arxiv.org/pdf/2307.05386">pdf</a>, <a href="https://arxiv.org/format/2307.05386">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Potential of Integrated Optical Sensing and Communication (IOSAC) Systems with Si Waveguides for Future Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ou%2C+X">Xiangpeng Ou</a>, <a href="/search/eess?searchtype=author&query=Qiu%2C+Y">Ying Qiu</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+M">Ming Luo</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+F">Fujun Sun</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+P">Peng Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Gang Yang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Junjie Li</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+J">Jianfeng Gao</a>, <a href="/search/eess?searchtype=author&query=He%2C+X">Xiaobin He</a>, <a href="/search/eess?searchtype=author&query=Du%2C+A">Anyan Du</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+B">Bo Tang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+B">Bin Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zichen Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhihua Li</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Ling Xie</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xi Xiao</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+J">Jun Luo</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+W">Wenwu Wang</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+J">Jin Tao</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yan Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.05386v1-abstract-short" style="display: inline;"> Advanced silicon photonic technologies enable integrated optical sensing and communication (IOSAC) in real time for the emerging application requirements of simultaneous sensing and communication for next-generation networks. Here, we propose and demonstrate the IOSAC system on the silicon nitride (SiN) photonics platform. The IOSAC devices based on microring resonators are capable of monitoring t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05386v1-abstract-full').style.display = 'inline'; document.getElementById('2307.05386v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.05386v1-abstract-full" style="display: none;"> Advanced silicon photonic technologies enable integrated optical sensing and communication (IOSAC) in real time for the emerging application requirements of simultaneous sensing and communication for next-generation networks. Here, we propose and demonstrate the IOSAC system on the silicon nitride (SiN) photonics platform. The IOSAC devices based on microring resonators are capable of monitoring the variation of analytes, transmitting the information to the terminal along with the modulated optical signal in real-time, and replacing bulk optics in high-precision and high-speed applications. By directly integrating SiN ring resonators with optical communication networks, simultaneous sensing and optical communication are demonstrated by an optical signal transmission experimental system using especially filtering amplified spontaneous emission spectra. The refractive index (RI) sensing ring with a sensitivity of 172 nm/RIU, a figure of merit (FOM) of 1220, and a detection limit (DL) of 8.2*10-6 RIU is demonstrated. Simultaneously, the 1.25 Gbps optical on-off-keying (OOK) signal is transmitted at the concentration of different NaCl solutions, which indicates the bit-error-ratio (BER) decreases with the increase in concentration. The novel IOSAC technology shows the potential to realize high-performance simultaneous biosensing and communication in real time and further accelerate the development of IoT and 6G networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05386v1-abstract-full').style.display = 'none'; document.getElementById('2307.05386v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11pages, 5 figutres</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.05365">arXiv:2307.05365</a> <span> [<a href="https://arxiv.org/pdf/2307.05365">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Decoding Taste Information in Human Brain: A Temporal and Spatial Reconstruction Data Augmentation Method Coupled with Taste EEG </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiuxin Xia</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yuchao Yang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yan Shi</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+W">Wenbo Zheng</a>, <a href="/search/eess?searchtype=author&query=Men%2C+H">Hong Men</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.05365v1-abstract-short" style="display: inline;"> For humans, taste is essential for perceiving food's nutrient content or harmful components. The current sensory evaluation of taste mainly relies on artificial sensory evaluation and electronic tongue, but the former has strong subjectivity and poor repeatability, and the latter is not flexible enough. This work proposed a strategy for acquiring and recognizing taste electroencephalogram (EEG), a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05365v1-abstract-full').style.display = 'inline'; document.getElementById('2307.05365v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.05365v1-abstract-full" style="display: none;"> For humans, taste is essential for perceiving food's nutrient content or harmful components. The current sensory evaluation of taste mainly relies on artificial sensory evaluation and electronic tongue, but the former has strong subjectivity and poor repeatability, and the latter is not flexible enough. This work proposed a strategy for acquiring and recognizing taste electroencephalogram (EEG), aiming to decode people's objective perception of taste through taste EEG. Firstly, according to the proposed experimental paradigm, the taste EEG of subjects under different taste stimulation was collected. Secondly, to avoid insufficient training of the model due to the small number of taste EEG samples, a Temporal and Spatial Reconstruction Data Augmentation (TSRDA) method was proposed, which effectively augmented the taste EEG by reconstructing the taste EEG's important features in temporal and spatial dimensions. Thirdly, a multi-view channel attention module was introduced into a designed convolutional neural network to extract the important features of the augmented taste EEG. The proposed method has accuracy of 99.56%, F1-score of 99.48%, and kappa of 99.38%, proving the method's ability to distinguish the taste EEG evoked by different taste stimuli successfully. In summary, combining TSRDA with taste EEG technology provides an objective and effective method for sensory evaluation of food taste. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05365v1-abstract-full').style.display = 'none'; document.getElementById('2307.05365v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 11 figures, 30 references, article is being submitted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.03387">arXiv:2307.03387</a> <span> [<a href="https://arxiv.org/pdf/2307.03387">pdf</a>, <a href="https://arxiv.org/ps/2307.03387">ps</a>, <a href="https://arxiv.org/format/2307.03387">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Joint Design for Full-duplex OFDM AF Relay System with Precoded Short Guard Interval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+P">Pu Yang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Qu%2C+Q">Qingyue Qu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Han Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.03387v1-abstract-short" style="display: inline;"> In-band full-duplex relay (FDR) has attracted much attention as an effective solution to improve the coverage and spectral efficiency in wireless communication networks. The basic problem for FDR transmission is how to eliminate the inherent self-interference and re-use the residual self-interference (RSI) at the relay to improve the end-to-end performance. Considering the RSI at the FDR, the over… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03387v1-abstract-full').style.display = 'inline'; document.getElementById('2307.03387v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.03387v1-abstract-full" style="display: none;"> In-band full-duplex relay (FDR) has attracted much attention as an effective solution to improve the coverage and spectral efficiency in wireless communication networks. The basic problem for FDR transmission is how to eliminate the inherent self-interference and re-use the residual self-interference (RSI) at the relay to improve the end-to-end performance. Considering the RSI at the FDR, the overall equivalent channel can be modeled as an infinite impulse response (IIR) channel. For this IIR channel, a joint design for precoding, power gain control and equalization of cooperative OFDM relay systems is presented. Compared with the traditional OFDM systems, the length of the guard interval for the proposed design can be distinctly reduced, thereby improving the spectral efficiency. By analyzing the noise sources, this paper evaluates the signal to noise ratio (SNR) of the proposed scheme and presents a power gain control algorithm at the FDR. Compared with the existing schemes, the proposed scheme shows a superior bit error rate (BER) performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03387v1-abstract-full').style.display = 'none'; document.getElementById('2307.03387v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 5 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 94-10 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.1.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.01798">arXiv:2307.01798</a> <span> [<a href="https://arxiv.org/pdf/2307.01798">pdf</a>, <a href="https://arxiv.org/ps/2307.01798">ps</a>, <a href="https://arxiv.org/format/2307.01798">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Edge-aware Multi-task Network for Integrating Quantification Segmentation and Uncertainty Prediction of Liver Tumor on Multi-modality Non-contrast MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiaojiao Xiao</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Q">Qinmin Hu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+G">Guanghui Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.01798v1-abstract-short" style="display: inline;"> Simultaneous multi-index quantification, segmentation, and uncertainty estimation of liver tumors on multi-modality non-contrast magnetic resonance imaging (NCMRI) are crucial for accurate diagnosis. However, existing methods lack an effective mechanism for multi-modality NCMRI fusion and accurate boundary information capture, making these tasks challenging. To address these issues, this paper pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01798v1-abstract-full').style.display = 'inline'; document.getElementById('2307.01798v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.01798v1-abstract-full" style="display: none;"> Simultaneous multi-index quantification, segmentation, and uncertainty estimation of liver tumors on multi-modality non-contrast magnetic resonance imaging (NCMRI) are crucial for accurate diagnosis. However, existing methods lack an effective mechanism for multi-modality NCMRI fusion and accurate boundary information capture, making these tasks challenging. To address these issues, this paper proposes a unified framework, namely edge-aware multi-task network (EaMtNet), to associate multi-index quantification, segmentation, and uncertainty of liver tumors on the multi-modality NCMRI. The EaMtNet employs two parallel CNN encoders and the Sobel filters to extract local features and edge maps, respectively. The newly designed edge-aware feature aggregation module (EaFA) is used for feature fusion and selection, making the network edge-aware by capturing long-range dependency between feature and edge maps. Multi-tasking leverages prediction discrepancy to estimate uncertainty and improve segmentation and quantification performance. Extensive experiments are performed on multi-modality NCMRI with 250 clinical subjects. The proposed model outperforms the state-of-the-art by a large margin, achieving a dice similarity coefficient of 90.01$\pm$1.23 and a mean absolute error of 2.72$\pm$0.58 mm for MD. The results demonstrate the potential of EaMtNet as a reliable clinical-aided tool for medical image analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01798v1-abstract-full').style.display = 'none'; document.getElementById('2307.01798v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.10805">arXiv:2306.10805</a> <span> [<a href="https://arxiv.org/pdf/2306.10805">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.47852/bonviewMEDIN42024108">10.47852/bonviewMEDIN42024108 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Experts' cognition-driven ensemble deep learning for external validation of predicting pathological complete response to neoadjuvant chemotherapy from histological images in breast cancer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yongquan Yang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+F">Fengling Li</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+Y">Yani Wei</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yuanyuan Zhao</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+J">Jing Fu</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xiuli Xiao</a>, <a href="/search/eess?searchtype=author&query=Bu%2C+H">Hong Bu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.10805v2-abstract-short" style="display: inline;"> In breast cancer, neoadjuvant chemotherapy (NAC) provides a standard treatment option for patients who have locally advanced cancer and some large operable tumors. A patient will have better prognosis when he has achieved a pathological complete response (pCR) with the treatment of NAC. There has been a trend to directly predict pCR to NAC from histological images based on deep learning (DL). Howe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10805v2-abstract-full').style.display = 'inline'; document.getElementById('2306.10805v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.10805v2-abstract-full" style="display: none;"> In breast cancer, neoadjuvant chemotherapy (NAC) provides a standard treatment option for patients who have locally advanced cancer and some large operable tumors. A patient will have better prognosis when he has achieved a pathological complete response (pCR) with the treatment of NAC. There has been a trend to directly predict pCR to NAC from histological images based on deep learning (DL). However, the DL-based predictive models numerically have better performances in internal validation than in external validation. In this paper, we aim to alleviate this situation with an intrinsic approach. We propose an experts' cognition-driven ensemble deep learning (ECDEDL) approach. Taking the cognition of both pathology and artificial intelligence experts into consideration to improve the generalization of the predictive model to the external validation, ECDEDL can intrinsically approximate the working paradigm of a human being which will refer to his various working experiences to make decisions. ECDEDL was validated with 695 WSIs collected from the same center as the primary dataset to develop the predictive model and perform the internal validation, and was also validated with 340 WSIs collected from other three centers as the external dataset to perform the external validation. In external validation, ECDEDL improves the AUCs of pCR prediction from 61.52(59.80-63.26) to 67.75(66.74-68.80) and the Accuracies of pCR prediction from 56.09(49.39-62.79) to 71.01(69.44-72.58). ECDEDL was quite effective for external validation of predicting pCR to NAC from histological images in breast cancer, numerically approximating the internal validation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10805v2-abstract-full').style.display = 'none'; document.getElementById('2306.10805v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is the final published version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.00812">arXiv:2306.00812</a> <span> [<a href="https://arxiv.org/pdf/2306.00812">pdf</a>, <a href="https://arxiv.org/format/2306.00812">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Harmonic enhancement using learnable comb filter for light-weight full-band speech enhancement model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Le%2C+X">Xiaohuai Le</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+T">Tong Lei</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+L">Li Chen</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yiqing Guo</a>, <a href="/search/eess?searchtype=author&query=He%2C+C">Chao He</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+C">Cheng Chen</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xianjun Xia</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+H">Hua Gao</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yijian Xiao</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+P">Piao Ding</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Shenyi Song</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+J">Jing Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.00812v1-abstract-short" style="display: inline;"> With fewer feature dimensions, filter banks are often used in light-weight full-band speech enhancement models. In order to further enhance the coarse speech in the sub-band domain, it is necessary to apply a post-filtering for harmonic retrieval. The signal processing-based comb filters used in RNNoise and PercepNet have limited performance and may cause speech quality degradation due to inaccura… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00812v1-abstract-full').style.display = 'inline'; document.getElementById('2306.00812v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.00812v1-abstract-full" style="display: none;"> With fewer feature dimensions, filter banks are often used in light-weight full-band speech enhancement models. In order to further enhance the coarse speech in the sub-band domain, it is necessary to apply a post-filtering for harmonic retrieval. The signal processing-based comb filters used in RNNoise and PercepNet have limited performance and may cause speech quality degradation due to inaccurate fundamental frequency estimation. To tackle this problem, we propose a learnable comb filter to enhance harmonics. Based on the sub-band model, we design a DNN-based fundamental frequency estimator to estimate the discrete fundamental frequencies and a comb filter for harmonic enhancement, which are trained via an end-to-end pattern. The experiments show the advantages of our proposed method over PecepNet and DeepFilterNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00812v1-abstract-full').style.display = 'none'; document.getElementById('2306.00812v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by Interspeech 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.13753">arXiv:2305.13753</a> <span> [<a href="https://arxiv.org/pdf/2305.13753">pdf</a>, <a href="https://arxiv.org/format/2305.13753">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Graph-Based Collision Resolution Scheme for Asynchronous Unsourced Random Access </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianya Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yongpeng Wu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+W">Wenjun Zhang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+C">Chengshan Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.13753v4-abstract-short" style="display: inline;"> This paper investigates the multiple-input-multiple-output (MIMO) massive unsourced random access in an asynchronous orthogonal frequency division multiplexing (OFDM) system, with both timing and frequency offsets (TFO) and non-negligible user collisions. The proposed coding framework splits the data into two parts encoded by sparse regression code (SPARC) and low-density parity check (LDPC) code.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13753v4-abstract-full').style.display = 'inline'; document.getElementById('2305.13753v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.13753v4-abstract-full" style="display: none;"> This paper investigates the multiple-input-multiple-output (MIMO) massive unsourced random access in an asynchronous orthogonal frequency division multiplexing (OFDM) system, with both timing and frequency offsets (TFO) and non-negligible user collisions. The proposed coding framework splits the data into two parts encoded by sparse regression code (SPARC) and low-density parity check (LDPC) code. Multistage orthogonal pilots are transmitted in the first part to reduce collision density. Unlike existing schemes requiring a quantization codebook with a large size for estimating TFO, we establish a \textit{graph-based channel reconstruction and collision resolution (GB-CR$^2$)} algorithm to iteratively reconstruct channels, resolve collisions, and compensate for TFO rotations on the formulated graph jointly among multiple stages. We further propose to leverage the geometric characteristics of signal constellations to correct TFO estimations. Exhaustive simulations demonstrate remarkable performance superiority in channel estimation and data recovery with substantial complexity reduction compared to state-of-the-art schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13753v4-abstract-full').style.display = 'none'; document.getElementById('2305.13753v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 6 figures, accepted for the presentation at IEEE GLOBECOM 2023</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xia%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xia%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository