CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 64 results for author: <span class="mathjax">Qi, J</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Qi%2C+J">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Qi, J"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Qi%2C+J&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Qi, J"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Qi%2C+J&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Qi%2C+J&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Qi%2C+J&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18201">arXiv:2501.18201</a> <span> [<a href="https://arxiv.org/pdf/2501.18201">pdf</a>, <a href="https://arxiv.org/format/2501.18201">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Neural Operator based Reinforcement Learning for Control of first-order PDEs with Spatially-Varying State Delay </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+J">Jiaqi Hu</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jie Qi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jing Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18201v1-abstract-short" style="display: inline;"> Control of distributed parameter systems affected by delays is a challenging task, particularly when the delays depend on spatial variables. The idea of integrating analytical control theory with learning-based control within a unified control scheme is becoming increasingly promising and advantageous. In this paper, we address the problem of controlling an unstable first-order hyperbolic PDE with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18201v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18201v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18201v1-abstract-full" style="display: none;"> Control of distributed parameter systems affected by delays is a challenging task, particularly when the delays depend on spatial variables. The idea of integrating analytical control theory with learning-based control within a unified control scheme is becoming increasingly promising and advantageous. In this paper, we address the problem of controlling an unstable first-order hyperbolic PDE with spatially-varying delays by combining PDE backstepping control strategies and deep reinforcement learning (RL). To eliminate the assumption on the delay function required for the backstepping design, we propose a soft actor-critic (SAC) architecture incorporating a DeepONet to approximate the backstepping controller. The DeepONet extracts features from the backstepping controller and feeds them into the policy network. In simulations, our algorithm outperforms the baseline SAC without prior backstepping knowledge and the analytical controller. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18201v1-abstract-full').style.display = 'none'; document.getElementById('2501.18201v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 Pages, 7 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09759">arXiv:2501.09759</a> <span> [<a href="https://arxiv.org/pdf/2501.09759">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> </div> <p class="title is-5 mathjax"> A wideband amplifying and filtering reconfigurable intelligent surface for wireless relay </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+L">Lijie Wu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Q+Y">Qun Yan Zhou</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+J+Y">Jun Yan Dai</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Siran Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Junwei Zhang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+Z+J">Zhen Jie Qi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+H">Hanqing Yang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+R">Ruizhe Jiang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z+X">Zheng Xing Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Huidong Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhen Zhang</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+J">Jiang Luo</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+Q">Qiang Cheng</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+T+J">Tie Jun Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09759v1-abstract-short" style="display: inline;"> Programmable metasurfaces have garnered significant attention due to their exceptional ability to manipulate electromagnetic (EM) waves in real time, leading to the emergence of a prominent area in wireless communication, namely reconfigurable intelligent surfaces (RISs), to control the signal propagation and coverage. However, the existing RISs usually suffer from limited operating distance and b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09759v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09759v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09759v1-abstract-full" style="display: none;"> Programmable metasurfaces have garnered significant attention due to their exceptional ability to manipulate electromagnetic (EM) waves in real time, leading to the emergence of a prominent area in wireless communication, namely reconfigurable intelligent surfaces (RISs), to control the signal propagation and coverage. However, the existing RISs usually suffer from limited operating distance and band interference, which hinder their practical applications in wireless relay and communication systems. To overcome the limitations, we propose an amplifying and filtering RIS (AF-RIS) to enhance the in-band signal energy and filter the out-of-band signal of the incident EM waves, ensuring the miniaturization of the RIS array and enabling its anti-interference ability. In addition, each AF-RIS element is equipped with a 2-bit phase control capability, further endowing the entire array with great beamforming performance. An elaborately designed 4*8 AF-RIS array is presented by integrating the power dividing and combining networks, which substantially reduces the number of amplifiers and filters, thereby reducing the hardware costs and power consumption. Experimental results showcase the powerful capabilities of AF-RIS in beam-steering, frequency selectivity, and signal amplification. Therefore, the proposed AF-RIS holds significant promise for critical applications in wireless relay systems by offering an efficient solution to improve frequency selectivity, enhance signal coverage, and reduce hardware size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09759v1-abstract-full').style.display = 'none'; document.getElementById('2501.09759v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05961">arXiv:2501.05961</a> <span> [<a href="https://arxiv.org/pdf/2501.05961">pdf</a>, <a href="https://arxiv.org/format/2501.05961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Swin-X2S: Reconstructing 3D Shape from 2D Biplanar X-ray with Swin Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+K">Kuan Liu</a>, <a href="/search/eess?searchtype=author&query=Ying%2C+Z">Zongyuan Ying</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+J">Jie Jin</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Dongyan Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+P">Ping Huang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+W">Wenjian Wu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhe Chen</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jin Qi</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Y">Yong Lu</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+L">Lianfu Deng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+B">Bo Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05961v1-abstract-short" style="display: inline;"> The conversion from 2D X-ray to 3D shape holds significant potential for improving diagnostic efficiency and safety. However, existing reconstruction methods often rely on hand-crafted features, manual intervention, and prior knowledge, resulting in unstable shape errors and additional processing costs. In this paper, we introduce Swin-X2S, an end-to-end deep learning method for directly reconstru… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05961v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05961v1-abstract-full" style="display: none;"> The conversion from 2D X-ray to 3D shape holds significant potential for improving diagnostic efficiency and safety. However, existing reconstruction methods often rely on hand-crafted features, manual intervention, and prior knowledge, resulting in unstable shape errors and additional processing costs. In this paper, we introduce Swin-X2S, an end-to-end deep learning method for directly reconstructing 3D segmentation and labeling from 2D biplanar orthogonal X-ray images. Swin-X2S employs an encoder-decoder architecture: the encoder leverages 2D Swin Transformer for X-ray information extraction, while the decoder employs 3D convolution with cross-attention to integrate structural features from orthogonal views. A dimension-expanding module is introduced to bridge the encoder and decoder, ensuring a smooth conversion from 2D pixels to 3D voxels. We evaluate proposed method through extensive qualitative and quantitative experiments across nine publicly available datasets covering four anatomies (femur, hip, spine, and rib), with a total of 54 categories. Significant improvements over previous methods have been observed not only in the segmentation and labeling metrics but also in the clinically relevant parameters that are of primary concern in practical applications, which demonstrates the promise of Swin-X2S to provide an effective option for anatomical shape reconstruction in clinical scenarios. Code implementation is available at: \url{https://github.com/liukuan5625/Swin-X2S}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05961v1-abstract-full').style.display = 'none'; document.getElementById('2501.05961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08219">arXiv:2412.08219</a> <span> [<a href="https://arxiv.org/pdf/2412.08219">pdf</a>, <a href="https://arxiv.org/format/2412.08219">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Neural Operator Feedback for a First-Order PIDE with Spatially-Varying State Delay </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jie Qi</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+J">Jiaqi Hu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/eess?searchtype=author&query=Krstic%2C+M">Miroslav Krstic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08219v2-abstract-short" style="display: inline;"> A transport PDE with a spatial integral and recirculation with constant delay has been a benchmark for neural operator approximations of PDE backstepping controllers. Introducing a spatially-varying delay into the model gives rise to a gain operator defined through integral equations which the operator's input -- the varying delay function -- enters in previously unencountered manners, including i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08219v2-abstract-full').style.display = 'inline'; document.getElementById('2412.08219v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08219v2-abstract-full" style="display: none;"> A transport PDE with a spatial integral and recirculation with constant delay has been a benchmark for neural operator approximations of PDE backstepping controllers. Introducing a spatially-varying delay into the model gives rise to a gain operator defined through integral equations which the operator's input -- the varying delay function -- enters in previously unencountered manners, including in the limits of integration and as the inverse of the `delayED time' function. This, in turn, introduces novel mathematical challenges in estimating the operator's Lipschitz constant. The backstepping kernel function having two branches endows the feedback law with a two-branch structure, where only one of the two feedback branches depends on both of the kernel branches. For this rich feedback structure, we propose a neural operator approximation of such a two-branch feedback law and prove the approximator to be semiglobally practically stabilizing. With numerical results we illustrate the training of the neural operator and its stabilizing capability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08219v2-abstract-full').style.display = 'none'; document.getElementById('2412.08219v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This 14 page paper contains 1 table and 20 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18610">arXiv:2410.18610</a> <span> [<a href="https://arxiv.org/pdf/2410.18610">pdf</a>, <a href="https://arxiv.org/format/2410.18610">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Joint Representation Using Continuous and Discrete Features for Cardiovascular Diseases Risk Prediction on Chest CT Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+M">Minfeng Xu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+C">Chen-Chen Fan</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yan-Jie Zhou</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+W">Wenchao Guo</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+P">Pan Liu</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jing Qi</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/eess?searchtype=author&query=Chao%2C+H">Hanqing Chao</a>, <a href="/search/eess?searchtype=author&query=He%2C+K">Kunlun He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18610v2-abstract-short" style="display: inline;"> Cardiovascular diseases (CVD) remain a leading health concern and contribute significantly to global mortality rates. While clinical advancements have led to a decline in CVD mortality, accurately identifying individuals who could benefit from preventive interventions remains an unsolved challenge in preventive cardiology. Current CVD risk prediction models, recommended by guidelines, are based on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18610v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18610v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18610v2-abstract-full" style="display: none;"> Cardiovascular diseases (CVD) remain a leading health concern and contribute significantly to global mortality rates. While clinical advancements have led to a decline in CVD mortality, accurately identifying individuals who could benefit from preventive interventions remains an unsolved challenge in preventive cardiology. Current CVD risk prediction models, recommended by guidelines, are based on limited traditional risk factors or use CT imaging to acquire quantitative biomarkers, and still have limitations in predictive accuracy and applicability. On the other hand, end-to-end trained CVD risk prediction methods leveraging deep learning on CT images often fail to provide transparent and explainable decision grounds for assisting physicians. In this work, we proposed a novel joint representation that integrates discrete quantitative biomarkers and continuous deep features extracted from chest CT scans. Our approach initiated with a deep CVD risk classification model by capturing comprehensive continuous deep learning features while jointly obtaining currently clinical-established quantitative biomarkers via segmentation models. In the feature joint representation stage, we use an instance-wise feature-gated mechanism to align the continuous and discrete features, followed by a soft instance-wise feature interaction mechanism fostering independent and effective feature interaction for the final CVD risk prediction. Our method substantially improves CVD risk predictive performance and offers individual contribution analysis of each biomarker, which is important in assisting physicians' decision-making processes. We validated our method on a public chest low-dose CT dataset and a private external chest standard-dose CT patient cohort of 17,207 CT volumes from 6,393 unique subjects, and demonstrated superior predictive performance, achieving AUCs of 0.875 and 0.843, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18610v2-abstract-full').style.display = 'none'; document.getElementById('2410.18610v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06115">arXiv:2410.06115</a> <span> [<a href="https://arxiv.org/pdf/2410.06115">pdf</a>, <a href="https://arxiv.org/format/2410.06115">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A physics-based perspective for understanding and utilizing spatial resources of wireless channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+H">Hui Xu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+J+W">Jun Wei Wu</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+Z+J">Zhen Jie Qi</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+H+T">Hao Tian Wu</a>, <a href="/search/eess?searchtype=author&query=Shao%2C+R+W">Rui Wen Shao</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+Q">Qiang Cheng</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+J">Jieao Zhu</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+L">Linglong Dai</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+T+J">Tie Jun Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06115v1-abstract-short" style="display: inline;"> To satisfy the increasing demands for transmission rates of wireless communications, it is necessary to use spatial resources of electromagnetic (EM) waves. In this context, EM information theory (EIT) has become a hot topic by integrating the theoretical framework of deterministic mathematics and stochastic statistics to explore the transmission mechanisms of continuous EM waves. However, the pre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06115v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06115v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06115v1-abstract-full" style="display: none;"> To satisfy the increasing demands for transmission rates of wireless communications, it is necessary to use spatial resources of electromagnetic (EM) waves. In this context, EM information theory (EIT) has become a hot topic by integrating the theoretical framework of deterministic mathematics and stochastic statistics to explore the transmission mechanisms of continuous EM waves. However, the previous studies were primarily focused on frame analysis, with limited exploration of practical applications and a comprehensive understanding of its essential physical characteristics. In this paper, we present a three-dimensional (3-D) line-of-sight channel capacity formula that captures the vector EM physics and accommodates both near- and far-field scenes. Based on the rigorous mathematical equation and the physical mechanism of fast multipole expansion, a channel model is established, and the finite angular spectral bandwidth feature of scattered waves is revealed. To adapt to the feature of the channel, an optimization problem is formulated for determining the mode currents on the transmitter, aiming to obtain the optimal design of the precoder and combiner. We make comprehensive analyses to investigate the relationship among the spatial degree of freedom, noise, and transmitted power, thereby establishing a rigorous upper bound of channel capacity. A series of simulations are conducted to validate the theoretical model and numerical method. This work offers a novel perspective and methodology for understanding and leveraging EIT, and provides a theoretical foundation for the design and optimization of future wireless communications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06115v1-abstract-full').style.display = 'none'; document.getElementById('2410.06115v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15596">arXiv:2409.15596</a> <span> [<a href="https://arxiv.org/pdf/2409.15596">pdf</a>, <a href="https://arxiv.org/format/2409.15596">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Computational Ghost Imaging with Low-Density Parity-Check Code </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuang Liu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Y">Yunkai Hu</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jinquan Qi</a>, <a href="/search/eess?searchtype=author&query=Han%2C+S">Shensheng Han</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Z">Zihuai Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15596v1-abstract-short" style="display: inline;"> Ghost imaging (GI) is a high-resolution imaging technology that has been a subject of interest to many fields in the past 20 years. Most GI researchers focus on the reconstruction of signal under-sampling, nevertheless, how to use information redundancy to improve the result's belief in a complex environment has hardly been studied. Motivated by this, we propose a computational GI system based on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15596v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15596v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15596v1-abstract-full" style="display: none;"> Ghost imaging (GI) is a high-resolution imaging technology that has been a subject of interest to many fields in the past 20 years. Most GI researchers focus on the reconstruction of signal under-sampling, nevertheless, how to use information redundancy to improve the result's belief in a complex environment has hardly been studied. Motivated by this, we propose a computational GI system based on the low-density parity-check (LDPC) coded radiation field by exploiting the signal redundancy. The non-ideal factors generated within the imaging process can be eliminated by setting up the matching fading channel model. We have derived the analytical lower bound on the bit error rate for the proposed LDPC-coded GI system. The effectiveness and performance of the LDPC-coded GI system are further validated through numerical and experiment results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15596v1-abstract-full').style.display = 'none'; document.getElementById('2409.15596v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.03557">arXiv:2311.03557</a> <span> [<a href="https://arxiv.org/pdf/2311.03557">pdf</a>, <a href="https://arxiv.org/format/2311.03557">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Spatio-Temporal Similarity Measure based Multi-Task Learning for Predicting Alzheimer's Disease Progression using MRI Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xulong Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+M">Menghui Zhou</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+T">Tong Liu</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+P">Po Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.03557v1-abstract-short" style="display: inline;"> Identifying and utilising various biomarkers for tracking Alzheimer's disease (AD) progression have received many recent attentions and enable helping clinicians make the prompt decisions. Traditional progression models focus on extracting morphological biomarkers in regions of interest (ROIs) from MRI/PET images, such as regional average cortical thickness and regional volume. They are effective… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03557v1-abstract-full').style.display = 'inline'; document.getElementById('2311.03557v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.03557v1-abstract-full" style="display: none;"> Identifying and utilising various biomarkers for tracking Alzheimer's disease (AD) progression have received many recent attentions and enable helping clinicians make the prompt decisions. Traditional progression models focus on extracting morphological biomarkers in regions of interest (ROIs) from MRI/PET images, such as regional average cortical thickness and regional volume. They are effective but ignore the relationships between brain ROIs over time, which would lead to synergistic deterioration. For exploring the synergistic deteriorating relationship between these biomarkers, in this paper, we propose a novel spatio-temporal similarity measure based multi-task learning approach for effectively predicting AD progression and sensitively capturing the critical relationships between biomarkers. Specifically, we firstly define a temporal measure for estimating the magnitude and velocity of biomarker change over time, which indicate a changing trend(temporal). Converting this trend into the vector, we then compare this variability between biomarkers in a unified vector space(spatial). The experimental results show that compared with directly ROI based learning, our proposed method is more effective in predicting disease progression. Our method also enables performing longitudinal stability selection to identify the changing relationships between biomarkers, which play a key role in disease progression. We prove that the synergistic deteriorating biomarkers between cortical volumes or surface areas have a significant effect on the cognitive prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03557v1-abstract-full').style.display = 'none'; document.getElementById('2311.03557v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.11436">arXiv:2307.11436</a> <span> [<a href="https://arxiv.org/pdf/2307.11436">pdf</a>, <a href="https://arxiv.org/format/2307.11436">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Analysis of PDEs">math.AP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.sysconle.2024.105714">10.1016/j.sysconle.2024.105714 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Neural Operators for PDE Backstepping Control of First-Order Hyperbolic PIDE with Recycle and Delay </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jie Qi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/eess?searchtype=author&query=Krstic%2C+M">Miroslav Krstic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.11436v2-abstract-short" style="display: inline;"> The recently introduced DeepONet operator-learning framework for PDE control is extended from the results for basic hyperbolic and parabolic PDEs to an advanced hyperbolic class that involves delays on both the state and the system output or input. The PDE backstepping design produces gain functions that are outputs of a nonlinear operator, mapping functions on a spatial domain into functions on a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11436v2-abstract-full').style.display = 'inline'; document.getElementById('2307.11436v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.11436v2-abstract-full" style="display: none;"> The recently introduced DeepONet operator-learning framework for PDE control is extended from the results for basic hyperbolic and parabolic PDEs to an advanced hyperbolic class that involves delays on both the state and the system output or input. The PDE backstepping design produces gain functions that are outputs of a nonlinear operator, mapping functions on a spatial domain into functions on a spatial domain, and where this gain-generating operator's inputs are the PDE's coefficients. The operator is approximated with a DeepONet neural network to a degree of accuracy that is provably arbitrarily tight. Once we produce this approximation-theoretic result in infinite dimension, with it we establish stability in closed loop under feedback that employs approximate gains. In addition to supplying such results under full-state feedback, we also develop DeepONet-approximated observers and output-feedback laws and prove their own stabilizing properties under neural operator approximations. With numerical simulations we illustrate the theoretical results and quantify the numerical effort savings, which are of two orders of magnitude, thanks to replacing the numerical PDE solving with the DeepONet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11436v2-abstract-full').style.display = 'none'; document.getElementById('2307.11436v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Systems & Control Letters, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.11424">arXiv:2307.11424</a> <span> [<a href="https://arxiv.org/pdf/2307.11424">pdf</a>, <a href="https://arxiv.org/ps/2307.11424">ps</a>, <a href="https://arxiv.org/format/2307.11424">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Analysis of PDEs">math.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Classical Physics">physics.class-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Fluid Dynamics">physics.flu-dyn</span> </div> </div> <p class="title is-5 mathjax"> Robust stabilization of $2 \times 2$ first-order hyperbolic PDEs with uncertain input delay </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jie Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.11424v1-abstract-short" style="display: inline;"> A backstepping-based compensator design is developed for a system of $2\times2$ first-order linear hyperbolic partial differential equations (PDE) in the presence of an uncertain long input delay at boundary. We introduce a transport PDE to represent the delayed input, which leads to three coupled first-order hyperbolic PDEs. A novel backstepping transformation, composed of two Volterra transforma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11424v1-abstract-full').style.display = 'inline'; document.getElementById('2307.11424v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.11424v1-abstract-full" style="display: none;"> A backstepping-based compensator design is developed for a system of $2\times2$ first-order linear hyperbolic partial differential equations (PDE) in the presence of an uncertain long input delay at boundary. We introduce a transport PDE to represent the delayed input, which leads to three coupled first-order hyperbolic PDEs. A novel backstepping transformation, composed of two Volterra transformations and an affine Volterra transformation, is introduced for the predictive control design. The resulting kernel equations from the affine Volterra transformation are two coupled first-order PDEs and each with two boundary conditions, which brings challenges to the well-posedness analysis. We solve the challenge by using the method of characteristics and the successive approximation. To analyze the sensitivity of the closed-loop system to uncertain input delay, we introduce a neutral system which captures the control effect resulted from the delay uncertainty. It is proved that the proposed control is robust to small delay variations. Numerical examples illustrate the performance of the proposed compensator. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11424v1-abstract-full').style.display = 'none'; document.getElementById('2307.11424v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.04212">arXiv:2307.04212</a> <span> [<a href="https://arxiv.org/pdf/2307.04212">pdf</a>, <a href="https://arxiv.org/format/2307.04212">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Analysis of PDEs">math.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Classical Physics">physics.class-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Fluid Dynamics">physics.flu-dyn</span> </div> </div> <p class="title is-5 mathjax"> Delay-Adaptive Control of First-order Hyperbolic PIDEs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shanshan Wang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jie Qi</a>, <a href="/search/eess?searchtype=author&query=Krstic%2C+M">Miroslav Krstic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.04212v1-abstract-short" style="display: inline;"> We develop a delay-adaptive controller for a class of first-order hyperbolic partial integro-differential equations (PIDEs) with an unknown input delay. By employing a transport PDE to represent delayed actuator states, the system is transformed into a transport partial differential equation (PDE) with unknown propagation speed cascaded with a PIDE. A parameter update law is designed using a Lyapu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04212v1-abstract-full').style.display = 'inline'; document.getElementById('2307.04212v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.04212v1-abstract-full" style="display: none;"> We develop a delay-adaptive controller for a class of first-order hyperbolic partial integro-differential equations (PIDEs) with an unknown input delay. By employing a transport PDE to represent delayed actuator states, the system is transformed into a transport partial differential equation (PDE) with unknown propagation speed cascaded with a PIDE. A parameter update law is designed using a Lyapunov argument and the infinite-dimensional backstepping technique to establish global stability results. Furthermore, the well-posedness of the closed-loop system is analyzed. Finally, the effectiveness of the proposed method was validated through numerical simulations <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04212v1-abstract-full').style.display = 'none'; document.getElementById('2307.04212v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.03727">arXiv:2307.03727</a> <span> [<a href="https://arxiv.org/pdf/2307.03727">pdf</a>, <a href="https://arxiv.org/ps/2307.03727">ps</a>, <a href="https://arxiv.org/format/2307.03727">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Analysis of PDEs">math.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Classical Physics">physics.class-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Fluid Dynamics">physics.flu-dyn</span> </div> </div> <p class="title is-5 mathjax"> Bilateral boundary control of an input delayed 2-D reaction-diffusion equation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Guan%2C+D">Dandan Guan</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yanmei Chen</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jie Qi</a>, <a href="/search/eess?searchtype=author&query=Du%2C+L">Linglong Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.03727v1-abstract-short" style="display: inline;"> In this paper, a delay compensation design method based on PDE backstepping is developed for a two-dimensional reaction-diffusion partial differential equation (PDE) with bilateral input delays. The PDE is defined in a rectangular domain, and the bilateral control is imposed on a pair of opposite sides of the rectangle. To represent the delayed bilateral inputs, we introduce two 2-D transport PDEs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03727v1-abstract-full').style.display = 'inline'; document.getElementById('2307.03727v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.03727v1-abstract-full" style="display: none;"> In this paper, a delay compensation design method based on PDE backstepping is developed for a two-dimensional reaction-diffusion partial differential equation (PDE) with bilateral input delays. The PDE is defined in a rectangular domain, and the bilateral control is imposed on a pair of opposite sides of the rectangle. To represent the delayed bilateral inputs, we introduce two 2-D transport PDEs that form a cascade system with the original PDE. A novel set of backstepping transformations is proposed for delay compensator design, including one Volterra integral transformation and two affine Volterra integral transformations. Unlike the kernel equation for 1-D PDE systems with delayed boundary input, the resulting kernel equations for the 2-D system have singular initial conditions governed by the Dirac Delta function. Consequently, the kernel solutions are written as a double trigonometric series with singularities. To address the challenge of stability analysis posed by the singularities, we prove a set of inequalities by using the Cauchy-Schwarz inequality, the 2-D Fourier series, and the Parseval's theorem. A numerical simulation illustrates the effectiveness of the proposed delay-compensation method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.03727v1-abstract-full').style.display = 'none'; document.getElementById('2307.03727v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 3 figures(including 8 sub-figures)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.07090">arXiv:2306.07090</a> <span> [<a href="https://arxiv.org/pdf/2306.07090">pdf</a>, <a href="https://arxiv.org/format/2306.07090">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Parameter-efficient Dysarthric Speech Recognition Using Adapter Fusion and Householder Transformation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jinzi Qi</a>, <a href="/search/eess?searchtype=author&query=Van+hamme%2C+H">Hugo Van hamme</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.07090v1-abstract-short" style="display: inline;"> In dysarthric speech recognition, data scarcity and the vast diversity between dysarthric speakers pose significant challenges. While finetuning has been a popular solution, it can lead to overfitting and low parameter efficiency. Adapter modules offer a better solution, with their small size and easy applicability. Additionally, Adapter Fusion can facilitate knowledge transfer from multiple learn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.07090v1-abstract-full').style.display = 'inline'; document.getElementById('2306.07090v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.07090v1-abstract-full" style="display: none;"> In dysarthric speech recognition, data scarcity and the vast diversity between dysarthric speakers pose significant challenges. While finetuning has been a popular solution, it can lead to overfitting and low parameter efficiency. Adapter modules offer a better solution, with their small size and easy applicability. Additionally, Adapter Fusion can facilitate knowledge transfer from multiple learned adapters, but may employ more parameters. In this work, we apply Adapter Fusion for target speaker adaptation and speech recognition, achieving acceptable accuracy with significantly fewer speaker-specific trainable parameters than classical finetuning methods. We further improve the parameter efficiency of the fusion layer by reducing the size of query and key layers and using Householder transformation to reparameterize the value linear layer. Our proposed fusion layer achieves comparable recognition results to the original method with only one third of the parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.07090v1-abstract-full').style.display = 'none'; document.getElementById('2306.07090v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Interspeech 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.12838">arXiv:2305.12838</a> <span> [<a href="https://arxiv.org/pdf/2305.12838">pdf</a>, <a href="https://arxiv.org/format/2305.12838">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> An Enhanced Res2Net with Local and Global Feature Fusion for Speaker Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yafeng Chen</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+S">Siqi Zheng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hui Wang</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+L">Luyao Cheng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qian Chen</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiajun Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.12838v2-abstract-short" style="display: inline;"> Effective fusion of multi-scale features is crucial for improving speaker verification performance. While most existing methods aggregate multi-scale features in a layer-wise manner via simple operations, such as summation or concatenation. This paper proposes a novel architecture called Enhanced Res2Net (ERes2Net), which incorporates both local and global feature fusion techniques to improve the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12838v2-abstract-full').style.display = 'inline'; document.getElementById('2305.12838v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.12838v2-abstract-full" style="display: none;"> Effective fusion of multi-scale features is crucial for improving speaker verification performance. While most existing methods aggregate multi-scale features in a layer-wise manner via simple operations, such as summation or concatenation. This paper proposes a novel architecture called Enhanced Res2Net (ERes2Net), which incorporates both local and global feature fusion techniques to improve the performance. The local feature fusion (LFF) fuses the features within one single residual block to extract the local signal. The global feature fusion (GFF) takes acoustic features of different scales as input to aggregate global signal. To facilitate effective feature fusion in both LFF and GFF, an attentional feature fusion module is employed in the ERes2Net architecture, replacing summation or concatenation operations. A range of experiments conducted on the VoxCeleb datasets demonstrate the superiority of the ERes2Net in speaker verification. Code has been made publicly available at https://github.com/alibaba-damo-academy/3D-Speaker. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12838v2-abstract-full').style.display = 'none'; document.getElementById('2305.12838v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.00127">arXiv:2305.00127</a> <span> [<a href="https://arxiv.org/pdf/2305.00127">pdf</a>, <a href="https://arxiv.org/format/2305.00127">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/JIOT.2023.3267625">10.1109/JIOT.2023.3267625 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Optimal Scheduling in IoT-Driven Smart Isolated Microgrids Based on Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiaju Qi</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+L">Lei Lei</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+K">Kan Zheng</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+S+X">Simon X. Yang</a>, <a href="/search/eess?searchtype=author&query=Xuemin"> Xuemin</a>, <a href="/search/eess?searchtype=author&query=Shen"> Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.00127v1-abstract-short" style="display: inline;"> In this paper, we investigate the scheduling issue of diesel generators (DGs) in an Internet of Things (IoT)-Driven isolated microgrid (MG) by deep reinforcement learning (DRL). The renewable energy is fully exploited under the uncertainty of renewable generation and load demand. The DRL agent learns an optimal policy from history renewable and load data of previous days, where the policy can gene… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.00127v1-abstract-full').style.display = 'inline'; document.getElementById('2305.00127v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.00127v1-abstract-full" style="display: none;"> In this paper, we investigate the scheduling issue of diesel generators (DGs) in an Internet of Things (IoT)-Driven isolated microgrid (MG) by deep reinforcement learning (DRL). The renewable energy is fully exploited under the uncertainty of renewable generation and load demand. The DRL agent learns an optimal policy from history renewable and load data of previous days, where the policy can generate real-time decisions based on observations of past renewable and load data of previous hours collected by connected sensors. The goal is to reduce operating cost on the premise of ensuring supply-demand balance. In specific, a novel finite-horizon partial observable Markov decision process (POMDP) model is conceived considering the spinning reserve. In order to overcome the challenge of discrete-continuous hybrid action space due to the binary DG switching decision and continuous energy dispatch (ED) decision, a DRL algorithm, namely the hybrid action finite-horizon RDPG (HAFH-RDPG), is proposed. HAFH-RDPG seamlessly integrates two classical DRL algorithms, i.e., deep Q-network (DQN) and recurrent deterministic policy gradient (RDPG), based on a finite-horizon dynamic programming (DP) framework. Extensive experiments are performed with real-world data in an IoT-driven MG to evaluate the capability of the proposed algorithm in handling the uncertainty due to inter-hour and inter-day power fluctuation and to compare its performance with those of the benchmark algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.00127v1-abstract-full').style.display = 'none'; document.getElementById('2305.00127v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.00676">arXiv:2302.00676</a> <span> [<a href="https://arxiv.org/pdf/2302.00676">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Light Extraction of Organic Light Emitting Diodes by Deep-Groove High-index Dielectric Nanomesh Using Large-area Nanoimprint </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Ji Qi</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+W">Wei Ding</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/eess?searchtype=author&query=Chou%2C+S+Y">Stephen Y. Chou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.00676v1-abstract-short" style="display: inline;"> To solve the conventional conflict between maintaining good charge transport property and achieving high light extraction efficiency when using micro/nanostructure patterned substrates to extract light from organic light emitting diodes (OLEDs), we developed a novel OLED structure, termed High-index Deep-Groove Dielectric Nanomesh OLED (HDNM-OLED), fabricated by large-area nanoimprint lithography… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00676v1-abstract-full').style.display = 'inline'; document.getElementById('2302.00676v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.00676v1-abstract-full" style="display: none;"> To solve the conventional conflict between maintaining good charge transport property and achieving high light extraction efficiency when using micro/nanostructure patterned substrates to extract light from organic light emitting diodes (OLEDs), we developed a novel OLED structure, termed High-index Deep-Groove Dielectric Nanomesh OLED (HDNM-OLED), fabricated by large-area nanoimprint lithography (NIL). The key component is a nanostructure-patterned substrate embedded with a high-index deep-groove nanomesh and capped with a low-index planarization layer. The high-index and deep-groove nanomesh efficiently releases the tapped photons to achieve significantly enhanced light extraction. And the planarization layer helps to maintain the good charge transport property of the organic active layers deposited on top of it. For a green phosphorescent OLED in our demonstration, with the HDNM-OLED structure, compared to planar conventional ITO-OLED structure, the external quantum efficiency (EQE) was enhanced by 1.85-fold from 26% to 48% and power efficiency was enhanced by 1.86-fold from 42lm/W to 79lm/W. Besides green OELDs, the HDNM-OLED structure was also shown to be able to work for red and blue-emitting OELDs and achieved enhanced light extraction efficiency (1.58-fold for red light, 1.86-fold for blue light) without further structure modification, which demonstrated the light extraction enhancement by the HDNM-OLED is broadband. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00676v1-abstract-full').style.display = 'none'; document.getElementById('2302.00676v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2302.00044</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.13939">arXiv:2211.13939</a> <span> [<a href="https://arxiv.org/pdf/2211.13939">pdf</a>, <a href="https://arxiv.org/format/2211.13939">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Efficient Incremental Text-to-Speech on GPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Du%2C+M">Muyang Du</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Chuan Liu</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiaxing Qi</a>, <a href="/search/eess?searchtype=author&query=Lai%2C+J">Junjie Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.13939v2-abstract-short" style="display: inline;"> Incremental text-to-speech, also known as streaming TTS, has been increasingly applied to online speech applications that require ultra-low response latency to provide an optimal user experience. However, most of the existing speech synthesis pipelines deployed on GPU are still non-incremental, which uncovers limitations in high-concurrency scenarios, especially when the pipeline is built with end… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.13939v2-abstract-full').style.display = 'inline'; document.getElementById('2211.13939v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.13939v2-abstract-full" style="display: none;"> Incremental text-to-speech, also known as streaming TTS, has been increasingly applied to online speech applications that require ultra-low response latency to provide an optimal user experience. However, most of the existing speech synthesis pipelines deployed on GPU are still non-incremental, which uncovers limitations in high-concurrency scenarios, especially when the pipeline is built with end-to-end neural network models. To address this issue, we present a highly efficient approach to perform real-time incremental TTS on GPUs with Instant Request Pooling and Module-wise Dynamic Batching. Experimental results demonstrate that the proposed method is capable of producing high-quality speech with a first-chunk latency lower than 80ms under 100 QPS on a single NVIDIA A10 GPU and significantly outperforms the non-incremental twin in both concurrency and latency. Our work reveals the effectiveness of high-performance incremental TTS on GPUs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.13939v2-abstract-full').style.display = 'none'; document.getElementById('2211.13939v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.13144">arXiv:2210.13144</a> <span> [<a href="https://arxiv.org/pdf/2210.13144">pdf</a>, <a href="https://arxiv.org/format/2210.13144">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Weak-Supervised Dysarthria-invariant Features for Spoken Language Understanding using an FHVAE and Adversarial Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jinzi Qi</a>, <a href="/search/eess?searchtype=author&query=Van+hamme%2C+H">Hugo Van hamme</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.13144v1-abstract-short" style="display: inline;"> The scarcity of training data and the large speaker variation in dysarthric speech lead to poor accuracy and poor speaker generalization of spoken language understanding systems for dysarthric speech. Through work on the speech features, we focus on improving the model generalization ability with limited dysarthric data. Factorized Hierarchical Variational Auto-Encoders (FHVAE) trained unsupervise… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.13144v1-abstract-full').style.display = 'inline'; document.getElementById('2210.13144v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.13144v1-abstract-full" style="display: none;"> The scarcity of training data and the large speaker variation in dysarthric speech lead to poor accuracy and poor speaker generalization of spoken language understanding systems for dysarthric speech. Through work on the speech features, we focus on improving the model generalization ability with limited dysarthric data. Factorized Hierarchical Variational Auto-Encoders (FHVAE) trained unsupervisedly have shown their advantage in disentangling content and speaker representations. Earlier work showed that the dysarthria shows in both feature vectors. Here, we add adversarial training to bridge the gap between the control and dysarthric speech data domains. We extract dysarthric and speaker invariant features using weak supervision. The extracted features are evaluated on a Spoken Language Understanding task and yield a higher accuracy on unseen speakers with more severe dysarthria compared to features from the basic FHVAE model or plain filterbanks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.13144v1-abstract-full').style.display = 'none'; document.getElementById('2210.13144v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.06382">arXiv:2210.06382</a> <span> [<a href="https://arxiv.org/pdf/2210.06382">pdf</a>, <a href="https://arxiv.org/format/2210.06382">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> An Ensemble Teacher-Student Learning Approach with Poisson Sub-sampling to Differential Privacy Preserving Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+C+H">Chao-Han Huck Yang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Siniscalchi%2C+S+M">Sabato Marco Siniscalchi</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Chin-Hui Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.06382v1-abstract-short" style="display: inline;"> We propose an ensemble learning framework with Poisson sub-sampling to effectively train a collection of teacher models to issue some differential privacy (DP) guarantee for training data. Through boosting under DP, a student model derived from the training data suffers little model degradation from the models trained with no privacy protection. Our proposed solution leverages upon two mechanisms,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.06382v1-abstract-full').style.display = 'inline'; document.getElementById('2210.06382v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.06382v1-abstract-full" style="display: none;"> We propose an ensemble learning framework with Poisson sub-sampling to effectively train a collection of teacher models to issue some differential privacy (DP) guarantee for training data. Through boosting under DP, a student model derived from the training data suffers little model degradation from the models trained with no privacy protection. Our proposed solution leverages upon two mechanisms, namely: (i) a privacy budget amplification via Poisson sub-sampling to train a target prediction model that requires less noise to achieve a same level of privacy budget, and (ii) a combination of the sub-sampling technique and an ensemble teacher-student learning framework that introduces DP-preserving noise at the output of the teacher models and transfers DP-preserving properties via noisy labels. Privacy-preserving student models are then trained with the noisy labels to learn the knowledge with DP-protection from the teacher model ensemble. Experimental evidences on spoken command recognition and continuous speech recognition of Mandarin speech show that our proposed framework greatly outperforms existing DP-preserving algorithms in both speech processing tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.06382v1-abstract-full').style.display = 'none'; document.getElementById('2210.06382v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ISCA, ISCSLP 2022, Singapore. 5 Pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.12459">arXiv:2205.12459</a> <span> [<a href="https://arxiv.org/pdf/2205.12459">pdf</a>, <a href="https://arxiv.org/format/2205.12459">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1049/ipr2.12733">10.1049/ipr2.12733 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A CNN with Noise Inclined Module and Denoise Framework for Hyperspectral Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gong%2C+Z">Zhiqiang Gong</a>, <a href="/search/eess?searchtype=author&query=Zhong%2C+P">Ping Zhong</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiahao Qi</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+P">Panhe Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.12459v1-abstract-short" style="display: inline;"> Deep Neural Networks have been successfully applied in hyperspectral image classification. However, most of prior works adopt general deep architectures while ignore the intrinsic structure of the hyperspectral image, such as the physical noise generation. This would make these deep models unable to generate discriminative features and provide impressive classification performance. To leverage suc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12459v1-abstract-full').style.display = 'inline'; document.getElementById('2205.12459v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.12459v1-abstract-full" style="display: none;"> Deep Neural Networks have been successfully applied in hyperspectral image classification. However, most of prior works adopt general deep architectures while ignore the intrinsic structure of the hyperspectral image, such as the physical noise generation. This would make these deep models unable to generate discriminative features and provide impressive classification performance. To leverage such intrinsic information, this work develops a novel deep learning framework with the noise inclined module and denoise framework for hyperspectral image classification. First, we model the spectral signature of hyperspectral image with the physical noise model to describe the high intraclass variance of each class and great overlapping between different classes in the image. Then, a noise inclined module is developed to capture the physical noise within each object and a denoise framework is then followed to remove such noise from the object. Finally, the CNN with noise inclined module and the denoise framework is developed to obtain discriminative features and provides good classification performance of hyperspectral image. Experiments are conducted over two commonly used real-world datasets and the experimental results show the effectiveness of the proposed method. The implementation of the proposed method and other compared methods could be accessed at https://github.com/shendu-sw/noise-physical-framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12459v1-abstract-full').style.display = 'none'; document.getElementById('2205.12459v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IET Image Processing, 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.09987">arXiv:2205.09987</a> <span> [<a href="https://arxiv.org/pdf/2205.09987">pdf</a>, <a href="https://arxiv.org/format/2205.09987">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Model Predictive Manipulation of Compliant Objects with Multi-Objective Optimizer and Adversarial Network for Occlusion Compensation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiaming Qi</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Dongyu Li</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Yufeng Gao</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+P">Peng Zhou</a>, <a href="/search/eess?searchtype=author&query=Navarro-Alarcon%2C+D">David Navarro-Alarcon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.09987v1-abstract-short" style="display: inline;"> The robotic manipulation of compliant objects is currently one of the most active problems in robotics due to its potential to automate many important applications. Despite the progress achieved by the robotics community in recent years, the 3D shaping of these types of materials remains an open research problem. In this paper, we propose a new vision-based controller to automatically regulate the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09987v1-abstract-full').style.display = 'inline'; document.getElementById('2205.09987v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.09987v1-abstract-full" style="display: none;"> The robotic manipulation of compliant objects is currently one of the most active problems in robotics due to its potential to automate many important applications. Despite the progress achieved by the robotics community in recent years, the 3D shaping of these types of materials remains an open research problem. In this paper, we propose a new vision-based controller to automatically regulate the shape of compliant objects with robotic arms. Our method uses an efficient online surface/curve fitting algorithm that quantifies the object's geometry with a compact vector of features; This feedback-like vector enables to establish an explicit shape servo-loop. To coordinate the motion of the robot with the computed shape features, we propose a receding-time estimator that approximates the system's sensorimotor model while satisfying various performance criteria. A deep adversarial network is developed to robustly compensate for visual occlusions in the camera's field of view, which enables to guide the shaping task even with partial observations of the object. Model predictive control is utilized to compute the robot's shaping motions subject to workspace and saturation constraints. A detailed experimental study is presented to validate the effectiveness of the proposed control framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09987v1-abstract-full').style.display = 'none'; document.getElementById('2205.09987v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.07659">arXiv:2203.07659</a> <span> [<a href="https://arxiv.org/pdf/2203.07659">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Breast Cancer Molecular Subtypes Prediction on Pathological Images with Discriminative Patch Selecting and Multi-Instance Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hong Liu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+W">Wen-Dong Xu</a>, <a href="/search/eess?searchtype=author&query=Shang%2C+Z">Zi-Hao Shang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiang-Dong Wang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+H">Hai-Yan Zhou</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+K">Ke-Wen Ma</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+H">Huan Zhou</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jia-Lin Qi</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+J">Jia-Rui Jiang</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+L">Li-Lan Tan</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+H">Hui-Min Zeng</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+H">Hui-Juan Cai</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kuan-Song Wang</a>, <a href="/search/eess?searchtype=author&query=Qian%2C+Y">Yue-Liang Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.07659v1-abstract-short" style="display: inline;"> Molecular subtypes of breast cancer are important references to personalized clinical treatment. For cost and labor savings, only one of the patient's paraffin blocks is usually selected for subsequent immunohistochemistry (IHC) to obtain molecular subtypes. Inevitable sampling error is risky due to tumor heterogeneity and could result in a delay in treatment. Molecular subtype prediction from con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.07659v1-abstract-full').style.display = 'inline'; document.getElementById('2203.07659v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.07659v1-abstract-full" style="display: none;"> Molecular subtypes of breast cancer are important references to personalized clinical treatment. For cost and labor savings, only one of the patient's paraffin blocks is usually selected for subsequent immunohistochemistry (IHC) to obtain molecular subtypes. Inevitable sampling error is risky due to tumor heterogeneity and could result in a delay in treatment. Molecular subtype prediction from conventional H&E pathological whole slide images (WSI) using AI method is useful and critical to assist pathologists pre-screen proper paraffin block for IHC. It's a challenging task since only WSI level labels of molecular subtypes can be obtained from IHC. Gigapixel WSIs are divided into a huge number of patches to be computationally feasible for deep learning. While with coarse slide-level labels, patch-based methods may suffer from abundant noise patches, such as folds, overstained regions, or non-tumor tissues. A weakly supervised learning framework based on discriminative patch selecting and multi-instance learning was proposed for breast cancer molecular subtype prediction from H&E WSIs. Firstly, co-teaching strategy was adopted to learn molecular subtype representations and filter out noise patches. Then, a balanced sampling strategy was used to handle the imbalance in subtypes in the dataset. In addition, a noise patch filtering algorithm that used local outlier factor based on cluster centers was proposed to further select discriminative patches. Finally, a loss function integrating patch with slide constraint information was used to finetune MIL framework on obtained discriminative patches and further improve the performance of molecular subtyping. The experimental results confirmed the effectiveness of the proposed method and our models outperformed even senior pathologists, with potential to assist pathologists to pre-screen paraffin blocks for IHC in clinic. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.07659v1-abstract-full').style.display = 'none'; document.getElementById('2203.07659v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.06031">arXiv:2203.06031</a> <span> [<a href="https://arxiv.org/pdf/2203.06031">pdf</a>, <a href="https://arxiv.org/format/2203.06031">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Low-Rank Tensor-Train Deep Neural Networks Based on Riemannian Gradient Descent With Illustrations of Speech Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C+H">Chao-Han Huck Yang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+P">Pin-Yu Chen</a>, <a href="/search/eess?searchtype=author&query=Tejedor%2C+J">Javier Tejedor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.06031v1-abstract-short" style="display: inline;"> This work focuses on designing low complexity hybrid tensor networks by considering trade-offs between the model complexity and practical performance. Firstly, we exploit a low-rank tensor-train deep neural network (TT-DNN) to build an end-to-end deep learning pipeline, namely LR-TT-DNN. Secondly, a hybrid model combining LR-TT-DNN with a convolutional neural network (CNN), which is denoted as CNN… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.06031v1-abstract-full').style.display = 'inline'; document.getElementById('2203.06031v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.06031v1-abstract-full" style="display: none;"> This work focuses on designing low complexity hybrid tensor networks by considering trade-offs between the model complexity and practical performance. Firstly, we exploit a low-rank tensor-train deep neural network (TT-DNN) to build an end-to-end deep learning pipeline, namely LR-TT-DNN. Secondly, a hybrid model combining LR-TT-DNN with a convolutional neural network (CNN), which is denoted as CNN+(LR-TT-DNN), is set up to boost the performance. Instead of randomly assigning large TT-ranks for TT-DNN, we leverage Riemannian gradient descent to determine a TT-DNN associated with small TT-ranks. Furthermore, CNN+(LR-TT-DNN) consists of convolutional layers at the bottom for feature extraction and several TT layers at the top to solve regression and classification problems. We separately assess the LR-TT-DNN and CNN+(LR-TT-DNN) models on speech enhancement and spoken command recognition tasks. Our empirical evidence demonstrates that the LR-TT-DNN and CNN+(LR-TT-DNN) models with fewer model parameters can outperform the TT-DNN and CNN+(TT-DNN) counterparts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.06031v1-abstract-full').style.display = 'none'; document.getElementById('2203.06031v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 10 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.03550">arXiv:2203.03550</a> <span> [<a href="https://arxiv.org/pdf/2203.03550">pdf</a>, <a href="https://arxiv.org/format/2203.03550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> When BERT Meets Quantum Temporal Convolution Learning for Text Classification in Heterogeneous Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+C+H">Chao-Han Huck Yang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+S+Y">Samuel Yen-Chi Chen</a>, <a href="/search/eess?searchtype=author&query=Tsao%2C+Y">Yu Tsao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+P">Pin-Yu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.03550v1-abstract-short" style="display: inline;"> The rapid development of quantum computing has demonstrated many unique characteristics of quantum advantages, such as richer feature representation and more secured protection on model parameters. This work proposes a vertical federated learning architecture based on variational quantum circuits to demonstrate the competitive performance of a quantum-enhanced pre-trained BERT model for text class… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.03550v1-abstract-full').style.display = 'inline'; document.getElementById('2203.03550v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.03550v1-abstract-full" style="display: none;"> The rapid development of quantum computing has demonstrated many unique characteristics of quantum advantages, such as richer feature representation and more secured protection on model parameters. This work proposes a vertical federated learning architecture based on variational quantum circuits to demonstrate the competitive performance of a quantum-enhanced pre-trained BERT model for text classification. In particular, our proposed hybrid classical-quantum model consists of a novel random quantum temporal convolution (QTC) learning framework replacing some layers in the BERT-based decoder. Our experiments on intent classification show that our proposed BERT-QTC model attains competitive experimental results in the Snips and ATIS spoken language datasets. Particularly, the BERT-QTC boosts the performance of the existing quantum circuit-based language model in two text classification datasets by 1.57% and 1.52% relative improvements. Furthermore, BERT-QTC can be feasibly deployed on both existing commercial-accessible quantum computation hardware and CPU-based interface for ensuring data isolation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.03550v1-abstract-full').style.display = 'none'; document.getElementById('2203.03550v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICASSP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.06727">arXiv:2202.06727</a> <span> </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> STG-GAN: A spatiotemporal graph generative adversarial networks for short-term passenger flow prediction in urban rail transit systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jinlei Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Hua Li</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+L">Lixing Yang</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+G">Guangyin Jin</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jianguo Qi</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Ziyou Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.06727v3-abstract-short" style="display: inline;"> Short-term passenger flow prediction is an important but challenging task for better managing urban rail transit (URT) systems. Some emerging deep learning models provide good insights to improve short-term prediction accuracy. However, there exist many complex spatiotemporal dependencies in URT systems. Most previous methods only consider the absolute error between ground truth and predictions as… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06727v3-abstract-full').style.display = 'inline'; document.getElementById('2202.06727v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.06727v3-abstract-full" style="display: none;"> Short-term passenger flow prediction is an important but challenging task for better managing urban rail transit (URT) systems. Some emerging deep learning models provide good insights to improve short-term prediction accuracy. However, there exist many complex spatiotemporal dependencies in URT systems. Most previous methods only consider the absolute error between ground truth and predictions as the optimization objective, which fails to account for spatial and temporal constraints on the predictions. Furthermore, a large number of existing prediction models introduce complex neural network layers to improve accuracy while ignoring their training efficiency and memory occupancy, decreasing the chances to be applied to the real world. To overcome these limitations, we propose a novel deep learning-based spatiotemporal graph generative adversarial network (STG-GAN) model with higher prediction accuracy, higher efficiency, and lower memory occupancy to predict short-term passenger flows of the URT network. Our model consists of two major parts, which are optimized in an adversarial learning manner: (1) a generator network including gated temporal conventional networks (TCN) and weight sharing graph convolution networks (GCN) to capture structural spatiotemporal dependencies and generate predictions with a relatively small computational burden; (2) a discriminator network including a spatial discriminator and a temporal discriminator to enhance the spatial and temporal constraints of the predictions. The STG-GAN is evaluated on two large-scale real-world datasets from Beijing Subway. A comparison with those of several state-of-the-art models illustrates its superiority and robustness. This study can provide critical experience in conducting short-term passenger flow predictions, especially from the perspective of real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06727v3-abstract-full').style.display = 'none'; document.getElementById('2202.06727v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">There are some errors that might mislead readers for this version. There is no new version right now</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> E.0 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.10609">arXiv:2201.10609</a> <span> [<a href="https://arxiv.org/pdf/2201.10609">pdf</a>, <a href="https://arxiv.org/format/2201.10609">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Hybrid Models of Tensor-Train Networks for Spoken Command Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Tejedor%2C+J">Javier Tejedor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.10609v1-abstract-short" style="display: inline;"> This work aims to design a low complexity spoken command recognition (SCR) system by considering different trade-offs between the number of model parameters and classification accuracy. More specifically, we exploit a deep hybrid architecture of a tensor-train (TT) network to build an end-to-end SRC pipeline. Our command recognition system, namely CNN+(TT-DNN), is composed of convolutional layers… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.10609v1-abstract-full').style.display = 'inline'; document.getElementById('2201.10609v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.10609v1-abstract-full" style="display: none;"> This work aims to design a low complexity spoken command recognition (SCR) system by considering different trade-offs between the number of model parameters and classification accuracy. More specifically, we exploit a deep hybrid architecture of a tensor-train (TT) network to build an end-to-end SRC pipeline. Our command recognition system, namely CNN+(TT-DNN), is composed of convolutional layers at the bottom for spectral feature extraction and TT layers at the top for command classification. Compared with a traditional end-to-end CNN baseline for SCR, our proposed CNN+(TT-DNN) model replaces fully connected (FC) layers with TT ones and it can substantially reduce the number of model parameters while maintaining the baseline performance of the CNN model. We initialize the CNN+(TT-DNN) model in a randomized manner or based on a well-trained CNN+DNN, and assess the CNN+(TT-DNN) models on the Google Speech Command Dataset. Our experimental results show that the proposed CNN+(TT-DNN) model attains a competitive accuracy of 96.31% with 4 times fewer model parameters than the CNN model. Furthermore, the CNN+(TT-DNN) model can obtain a 97.2% accuracy when the number of parameters is increased. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.10609v1-abstract-full').style.display = 'none'; document.getElementById('2201.10609v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in Proc. ICASSP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.01443">arXiv:2201.01443</a> <span> [<a href="https://arxiv.org/pdf/2201.01443">pdf</a>, <a href="https://arxiv.org/format/2201.01443">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> Neural KEM: A Kernel Method with Deep Coefficient Prior for PET Image Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+S">Siqi Li</a>, <a href="/search/eess?searchtype=author&query=Gong%2C+K">Kuang Gong</a>, <a href="/search/eess?searchtype=author&query=Badawi%2C+R+D">Ramsey D. Badawi</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+E+J">Edward J. Kim</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jinyi Qi</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+G">Guobao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.01443v2-abstract-short" style="display: inline;"> Image reconstruction of low-count positron emission tomography (PET) data is challenging. Kernel methods address the challenge by incorporating image prior information in the forward model of iterative PET image reconstruction. The kernelized expectation-maximization (KEM) algorithm has been developed and demonstrated to be effective and easy to implement. A common approach for a further improveme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.01443v2-abstract-full').style.display = 'inline'; document.getElementById('2201.01443v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.01443v2-abstract-full" style="display: none;"> Image reconstruction of low-count positron emission tomography (PET) data is challenging. Kernel methods address the challenge by incorporating image prior information in the forward model of iterative PET image reconstruction. The kernelized expectation-maximization (KEM) algorithm has been developed and demonstrated to be effective and easy to implement. A common approach for a further improvement of the kernel method would be adding an explicit regularization, which however leads to a complex optimization problem. In this paper, we propose an implicit regularization for the kernel method by using a deep coefficient prior, which represents the kernel coefficient image in the PET forward model using a convolutional neural-network. To solve the maximum-likelihood neural network-based reconstruction problem, we apply the principle of optimization transfer to derive a neural KEM algorithm. Each iteration of the algorithm consists of two separate steps: a KEM step for image update from the projection data and a deep-learning step in the image domain for updating the kernel coefficient image using the neural network. This optimization algorithm is guaranteed to monotonically increase the data likelihood. The results from computer simulations and real patient data have demonstrated that the neural KEM can outperform existing KEM and deep image prior methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.01443v2-abstract-full').style.display = 'none'; document.getElementById('2201.01443v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2110.01174</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.09216">arXiv:2112.09216</a> <span> [<a href="https://arxiv.org/pdf/2112.09216">pdf</a>, <a href="https://arxiv.org/format/2112.09216">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Deep-Learning Framework for Improving COVID-19 CT Image Quality and Diagnostic Accuracy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Goel%2C+G">Garvit Goel</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jingyuan Qi</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+W">Wu-chun Feng</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+G">Guohua Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.09216v1-abstract-short" style="display: inline;"> We present a deep-learning based computing framework for fast-and-accurate CT (DL-FACT) testing of COVID-19. Our CT-based DL framework was developed to improve the testing speed and accuracy of COVID-19 (plus its variants) via a DL-based approach for CT image enhancement and classification. The image enhancement network is adapted from DDnet, short for DenseNet and Deconvolution based network. To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.09216v1-abstract-full').style.display = 'inline'; document.getElementById('2112.09216v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.09216v1-abstract-full" style="display: none;"> We present a deep-learning based computing framework for fast-and-accurate CT (DL-FACT) testing of COVID-19. Our CT-based DL framework was developed to improve the testing speed and accuracy of COVID-19 (plus its variants) via a DL-based approach for CT image enhancement and classification. The image enhancement network is adapted from DDnet, short for DenseNet and Deconvolution based network. To demonstrate its speed and accuracy, we evaluated DL-FACT across several sources of COVID-19 CT images. Our results show that DL-FACT can significantly shorten the turnaround time from days to minutes and improve the COVID-19 testing accuracy up to 91%. DL-FACT could be used as a software tool for medical professionals in diagnosing and monitoring COVID-19. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.09216v1-abstract-full').style.display = 'none'; document.getElementById('2112.09216v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.01697">arXiv:2112.01697</a> <span> [<a href="https://arxiv.org/pdf/2112.01697">pdf</a>, <a href="https://arxiv.org/format/2112.01697">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> LMR-CBT: Learning Modality-fused Representations with CB-Transformer for Multimodal Emotion Recognition from Unaligned Multimodal Sequences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Fu%2C+Z">Ziwang Fu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hanyang Wang</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+S">Siyuan Shen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jiahao Zhang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiayin Qi</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+X">Xiangling Fu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+A">Aimin Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.01697v1-abstract-short" style="display: inline;"> Learning modality-fused representations and processing unaligned multimodal sequences are meaningful and challenging in multimodal emotion recognition. Existing approaches use directional pairwise attention or a message hub to fuse language, visual, and audio modalities. However, those approaches introduce information redundancy when fusing features and are inefficient without considering the comp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.01697v1-abstract-full').style.display = 'inline'; document.getElementById('2112.01697v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.01697v1-abstract-full" style="display: none;"> Learning modality-fused representations and processing unaligned multimodal sequences are meaningful and challenging in multimodal emotion recognition. Existing approaches use directional pairwise attention or a message hub to fuse language, visual, and audio modalities. However, those approaches introduce information redundancy when fusing features and are inefficient without considering the complementarity of modalities. In this paper, we propose an efficient neural network to learn modality-fused representations with CB-Transformer (LMR-CBT) for multimodal emotion recognition from unaligned multimodal sequences. Specifically, we first perform feature extraction for the three modalities respectively to obtain the local structure of the sequences. Then, we design a novel transformer with cross-modal blocks (CB-Transformer) that enables complementary learning of different modalities, mainly divided into local temporal learning,cross-modal feature fusion and global self-attention representations. In addition, we splice the fused features with the original features to classify the emotions of the sequences. Finally, we conduct word-aligned and unaligned experiments on three challenging datasets, IEMOCAP, CMU-MOSI, and CMU-MOSEI. The experimental results show the superiority and efficiency of our proposed method in both settings. Compared with the mainstream methods, our approach reaches the state-of-the-art with a minimum number of parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.01697v1-abstract-full').style.display = 'none'; document.getElementById('2112.01697v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages ,Figure 2, Table 5</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.08651">arXiv:2107.08651</a> <span> [<a href="https://arxiv.org/pdf/2107.08651">pdf</a>, <a href="https://arxiv.org/format/2107.08651">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TAC.2022.3174032">10.1109/TAC.2022.3174032 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Delay-Compensated Distributed PDE Control of Traffic with Connected/Automated Vehicles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jie Qi</a>, <a href="/search/eess?searchtype=author&query=Mo%2C+S">Shurong Mo</a>, <a href="/search/eess?searchtype=author&query=Krstic%2C+M">Miroslav Krstic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.08651v3-abstract-short" style="display: inline;"> We develop an input delay-compensating design for stabilization of an Aw-Rascle-Zhang (ARZ) traffic model in congested regime which is governed by a $2\times 2$ first-order hyperbolic nonlinear PDE. The traffic flow consists of both adaptive cruise control-equipped (ACC-equipped) and manually-driven vehicles. The control input is the time gap of ACC-equipped and connected vehicles, which is subjec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.08651v3-abstract-full').style.display = 'inline'; document.getElementById('2107.08651v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.08651v3-abstract-full" style="display: none;"> We develop an input delay-compensating design for stabilization of an Aw-Rascle-Zhang (ARZ) traffic model in congested regime which is governed by a $2\times 2$ first-order hyperbolic nonlinear PDE. The traffic flow consists of both adaptive cruise control-equipped (ACC-equipped) and manually-driven vehicles. The control input is the time gap of ACC-equipped and connected vehicles, which is subject to delays resulting from communication lag. For the linearized system, a novel three-branch bakcstepping transformation with explicit kernel functions is introduced to compensate the input delay. The transformation is proved 忙to be bounded, continuous and invertible, with explicit inverse transformation derived. Based on the transformation, we obtain the explicit predictor-feedback controller. We prove exponential stability of the closed-loop system with the delay compensator in $L_2$ norm. The performance improvement of the closed-loop system under the proposed controller is illustrated in simulation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.08651v3-abstract-full').style.display = 'none'; document.getElementById('2107.08651v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.10359">arXiv:2106.10359</a> <span> [<a href="https://arxiv.org/pdf/2106.10359">pdf</a>, <a href="https://arxiv.org/format/2106.10359">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> Direct Reconstruction of Linear Parametric Images from Dynamic PET Using Nonlocal Deep Image Prior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gong%2C+K">Kuang Gong</a>, <a href="/search/eess?searchtype=author&query=Catana%2C+C">Ciprian Catana</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jinyi Qi</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Quanzheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.10359v1-abstract-short" style="display: inline;"> Direct reconstruction methods have been developed to estimate parametric images directly from the measured PET sinograms by combining the PET imaging model and tracer kinetics in an integrated framework. Due to limited counts received, signal-to-noise-ratio (SNR) and resolution of parametric images produced by direct reconstruction frameworks are still limited. Recently supervised deep learning me… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.10359v1-abstract-full').style.display = 'inline'; document.getElementById('2106.10359v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.10359v1-abstract-full" style="display: none;"> Direct reconstruction methods have been developed to estimate parametric images directly from the measured PET sinograms by combining the PET imaging model and tracer kinetics in an integrated framework. Due to limited counts received, signal-to-noise-ratio (SNR) and resolution of parametric images produced by direct reconstruction frameworks are still limited. Recently supervised deep learning methods have been successfully applied to medical imaging denoising/reconstruction when large number of high-quality training labels are available. For static PET imaging, high-quality training labels can be acquired by extending the scanning time. However, this is not feasible for dynamic PET imaging, where the scanning time is already long enough. In this work, we proposed an unsupervised deep learning framework for direct parametric reconstruction from dynamic PET, which was tested on the Patlak model and the relative equilibrium Logan model. The patient's anatomical prior image, which is readily available from PET/CT or PET/MR scans, was supplied as the network input to provide a manifold constraint, and also utilized to construct a kernel layer to perform non-local feature denoising. The linear kinetic model was embedded in the network structure as a 1x1 convolution layer. The training objective function was based on the PET statistical model. Evaluations based on dynamic datasets of 18F-FDG and 11C-PiB tracers show that the proposed framework can outperform the traditional and the kernel method-based direct reconstruction methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.10359v1-abstract-full').style.display = 'none'; document.getElementById('2106.10359v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.07337">arXiv:2106.07337</a> <span> [<a href="https://arxiv.org/pdf/2106.07337">pdf</a>, <a href="https://arxiv.org/format/2106.07337">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Speech Disorder Classification Using Extended Factorized Hierarchical Variational Auto-encoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jinzi Qi</a>, <a href="/search/eess?searchtype=author&query=Van+hamme%2C+H">Hugo Van hamme</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.07337v1-abstract-short" style="display: inline;"> Objective speech disorder classification for speakers with communication difficulty is desirable for diagnosis and administering therapy. With the current state of speech technology, it is evident to propose neural networks for this application. But neural network model training is hampered by a lack of labeled disordered speech data. In this research, we apply an extended version of Factorized Hi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.07337v1-abstract-full').style.display = 'inline'; document.getElementById('2106.07337v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.07337v1-abstract-full" style="display: none;"> Objective speech disorder classification for speakers with communication difficulty is desirable for diagnosis and administering therapy. With the current state of speech technology, it is evident to propose neural networks for this application. But neural network model training is hampered by a lack of labeled disordered speech data. In this research, we apply an extended version of Factorized Hierarchical Variational Auto-encoders (FHVAE) for representation learning on disordered speech. The FHVAE model extracts both content-related and sequence-related latent variables from speech data, and we utilize the extracted variables to explore how disorder type information is represented in the latent variables. For better classification performance, the latent variables are aggregated at the word and sentence level. We show that an extension of the FHVAE model succeeds in the better disentanglement of the content-related and sequence-related related representations, but both representations are still required for best results on disorder type classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.07337v1-abstract-full').style.display = 'none'; document.getElementById('2106.07337v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures, submitted to INTERSPEECH2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.02424">arXiv:2106.02424</a> <span> [<a href="https://arxiv.org/pdf/2106.02424">pdf</a>, <a href="https://arxiv.org/format/2106.02424">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Contour Moments Based Manipulation of Composite Rigid-Deformable Objects with Finite Time Model Estimation and Shape/Position Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiaming Qi</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+G">Guangfu Ma</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+J">Jihong Zhu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+P">Peng Zhou</a>, <a href="/search/eess?searchtype=author&query=Lyu%2C+Y">Yueyong Lyu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Haibo Zhang</a>, <a href="/search/eess?searchtype=author&query=Navarro-Alarcon%2C+D">David Navarro-Alarcon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.02424v1-abstract-short" style="display: inline;"> The robotic manipulation of composite rigid-deformable objects (i.e. those with mixed non-homogeneous stiffness properties) is a challenging problem with clear practical applications that, despite the recent progress in the field, it has not been sufficiently studied in the literature. To deal with this issue, in this paper we propose a new visual servoing method that has the capability to manipul… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02424v1-abstract-full').style.display = 'inline'; document.getElementById('2106.02424v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.02424v1-abstract-full" style="display: none;"> The robotic manipulation of composite rigid-deformable objects (i.e. those with mixed non-homogeneous stiffness properties) is a challenging problem with clear practical applications that, despite the recent progress in the field, it has not been sufficiently studied in the literature. To deal with this issue, in this paper we propose a new visual servoing method that has the capability to manipulate this broad class of objects (which varies from soft to rigid) with the same adaptive strategy. To quantify the object's infinite-dimensional configuration, our new approach computes a compact feedback vector of 2D contour moments features. A sliding mode control scheme is then designed to simultaneously ensure the finite-time convergence of both the feedback shape error and the model estimation error. The stability of the proposed framework (including the boundedness of all the signals) is rigorously proved with Lyapunov theory. Detailed simulations and experiments are presented to validate the effectiveness of the proposed approach. To the best of the author's knowledge, this is the first time that contour moments along with finite-time control have been used to solve this difficult manipulation problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02424v1-abstract-full').style.display = 'none'; document.getElementById('2106.02424v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.00230">arXiv:2104.00230</a> <span> [<a href="https://arxiv.org/pdf/2104.00230">pdf</a>, <a href="https://arxiv.org/format/2104.00230">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Bidirectional Multiscale Feature Aggregation for Speaker Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiajun Qi</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.00230v1-abstract-short" style="display: inline;"> In this paper, we propose a novel bidirectional multiscale feature aggregation (BMFA) network with attentional fusion modules for text-independent speaker verification. The feature maps from different stages of the backbone network are iteratively combined and refined in both a bottom-up and top-down manner. Furthermore, instead of simple concatenation or element-wise addition of feature maps from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.00230v1-abstract-full').style.display = 'inline'; document.getElementById('2104.00230v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.00230v1-abstract-full" style="display: none;"> In this paper, we propose a novel bidirectional multiscale feature aggregation (BMFA) network with attentional fusion modules for text-independent speaker verification. The feature maps from different stages of the backbone network are iteratively combined and refined in both a bottom-up and top-down manner. Furthermore, instead of simple concatenation or element-wise addition of feature maps from different stages, an attentional fusion module is designed to compute the fusion weights. Experiments are conducted on the NIST SRE16 and VoxCeleb1 datasets. The experimental results demonstrate the effectiveness of the bidirectional aggregation strategy and show that the proposed attentional fusion module can further improve the performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.00230v1-abstract-full').style.display = 'none'; document.getElementById('2104.00230v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.00803">arXiv:2012.00803</a> <span> [<a href="https://arxiv.org/pdf/2012.00803">pdf</a>, <a href="https://arxiv.org/format/2012.00803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Generator Parameter Estimation by Q-Learning Based on PMU Measurements </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Khazeiynasab%2C+S+R">Seyyed Rashid Khazeiynasab</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Junjian Qi</a>, <a href="/search/eess?searchtype=author&query=Batarseh%2C+I">Issa Batarseh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.00803v1-abstract-short" style="display: inline;"> In this paper, a novel Q-learning based approach is proposed for estimating the parameters of synchronous generators using PMU measurements. Event playback is used to generate model outputs under different parameters for training the agent in Q-learning. We assume that the exact values of some parameters in the model are not known by the agent in Q-learning. Then, an optimal history-dependent poli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.00803v1-abstract-full').style.display = 'inline'; document.getElementById('2012.00803v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.00803v1-abstract-full" style="display: none;"> In this paper, a novel Q-learning based approach is proposed for estimating the parameters of synchronous generators using PMU measurements. Event playback is used to generate model outputs under different parameters for training the agent in Q-learning. We assume that the exact values of some parameters in the model are not known by the agent in Q-learning. Then, an optimal history-dependent policy for the exploration-exploitation trade-off is planned. With given prior knowledge, the parameter vector can be viewed as states with a specific reward, which is a function of the fitting error compared with the measurements. The agent takes an action (either increasing or decreasing the parameter) and the estimated parameter will move to a new state. Based on the reward function, the optimal action policy will move the parameter set to a state with the highest reward. If multiple events are available, they will be used sequentially so that the updated $\mathbfcal{Q}$-value can be utilized to improve the computational efficiency. The effectiveness of the proposed approach is validated by estimating the parameters of the dynamic model of a synchronous generator. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.00803v1-abstract-full').style.display = 'none'; document.getElementById('2012.00803v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.13309">arXiv:2010.13309</a> <span> [<a href="https://arxiv.org/pdf/2010.13309">pdf</a>, <a href="https://arxiv.org/format/2010.13309">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICASSP39728.2021.9413453">10.1109/ICASSP39728.2021.9413453 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Decentralizing Feature Extraction with Quantum Convolutional Neural Network for Automatic Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+C+H">Chao-Han Huck Yang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+S+Y">Samuel Yen-Chi Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+P">Pin-Yu Chen</a>, <a href="/search/eess?searchtype=author&query=Siniscalchi%2C+S+M">Sabato Marco Siniscalchi</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+X">Xiaoli Ma</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Chin-Hui Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.13309v2-abstract-short" style="display: inline;"> We propose a novel decentralized feature extraction approach in federated learning to address privacy-preservation issues for speech recognition. It is built upon a quantum convolutional neural network (QCNN) composed of a quantum circuit encoder for feature extraction, and a recurrent neural network (RNN) based end-to-end acoustic model (AM). To enhance model parameter protection in a decentraliz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.13309v2-abstract-full').style.display = 'inline'; document.getElementById('2010.13309v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.13309v2-abstract-full" style="display: none;"> We propose a novel decentralized feature extraction approach in federated learning to address privacy-preservation issues for speech recognition. It is built upon a quantum convolutional neural network (QCNN) composed of a quantum circuit encoder for feature extraction, and a recurrent neural network (RNN) based end-to-end acoustic model (AM). To enhance model parameter protection in a decentralized architecture, an input speech is first up-streamed to a quantum computing server to extract Mel-spectrogram, and the corresponding convolutional features are encoded using a quantum circuit algorithm with random parameters. The encoded features are then down-streamed to the local RNN model for the final recognition. The proposed decentralized framework takes advantage of the quantum learning progress to secure models and to avoid privacy leakage attacks. Testing on the Google Speech Commands Dataset, the proposed QCNN encoder attains a competitive accuracy of 95.12% in a decentralized model, which is better than the previous architectures using centralized RNN models with convolutional features. We also conduct an in-depth study of different quantum circuit encoder architectures to provide insights into designing QCNN-based feature extractors. Neural saliency analyses demonstrate a correlation between the proposed QCNN features, class activation maps, and input spectrograms. We provide an implementation for future studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.13309v2-abstract-full').style.display = 'none'; document.getElementById('2010.13309v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IEEE ICASSP 2021. Code is available: https://github.com/huckiyang/QuantumSpeech-QCNN</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.10919">arXiv:2010.10919</a> <span> </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Multi-task Metric Learning for Text-independent Speaker Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yafeng Chen</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+J">Jingjing Shi</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiajun Qi</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+T">Tan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.10919v2-abstract-short" style="display: inline;"> In this work, we introduce metric learning (ML) to enhance the deep embedding learning for text-independent speaker verification (SV). Specifically, the deep speaker embedding network is trained with conventional cross entropy loss and auxiliary pair-based ML loss function. For the auxiliary ML task, training samples of a mini-batch are first arranged into pairs, then positive and negative pairs a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.10919v2-abstract-full').style.display = 'inline'; document.getElementById('2010.10919v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.10919v2-abstract-full" style="display: none;"> In this work, we introduce metric learning (ML) to enhance the deep embedding learning for text-independent speaker verification (SV). Specifically, the deep speaker embedding network is trained with conventional cross entropy loss and auxiliary pair-based ML loss function. For the auxiliary ML task, training samples of a mini-batch are first arranged into pairs, then positive and negative pairs are selected and weighted through their own and relative similarities, and finally the auxiliary ML loss is calculated by the similarity of the selected pairs. To evaluate the proposed method, we conduct experiments on the Speaker in the Wild (SITW) dataset. The results demonstrate the effectiveness of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.10919v2-abstract-full').style.display = 'none'; document.getElementById('2010.10919v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Not a particularly high-quality work, so we request withdrawal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.07540">arXiv:2010.07540</a> <span> [<a href="https://arxiv.org/pdf/2010.07540">pdf</a>, <a href="https://arxiv.org/format/2010.07540">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Multi-Objective PMU Allocation for Resilient Power System Monitoring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Haggi%2C+H">Hamed Haggi</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+W">Wei Sun</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Junjian Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.07540v1-abstract-short" style="display: inline;"> Phasor measurement units (PMUs) enable better system monitoring and security enhancement in smart grids. In order to enhance power system resilience against outages and blackouts caused by extreme weather events or man-made attacks, it remains a major challenge to determine the optimal number and location of PMUs. In this paper, a multi-objective resilient PMU placement (MORPP) problem is formulat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.07540v1-abstract-full').style.display = 'inline'; document.getElementById('2010.07540v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.07540v1-abstract-full" style="display: none;"> Phasor measurement units (PMUs) enable better system monitoring and security enhancement in smart grids. In order to enhance power system resilience against outages and blackouts caused by extreme weather events or man-made attacks, it remains a major challenge to determine the optimal number and location of PMUs. In this paper, a multi-objective resilient PMU placement (MORPP) problem is formulated, and solved by a modified Teaching-Learning-Based Optimization (MO-TLBO) algorithm. Three objectives are considered in the MORPP problem, minimizing the number of PMUs, maximizing the system observability, and minimizing the voltage stability index. The effectiveness of the proposed method is validated through testing on IEEE 14-bus, 30-bus, and 118-bus test systems. The advantage of the MO-TLBO-based MORPP is demonstrated through the comparison with other methods in the literature, in terms of iteration number, optimality and time of convergence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.07540v1-abstract-full').style.display = 'none'; document.getElementById('2010.07540v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE PES General Meeting 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.06248">arXiv:2010.06248</a> <span> </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Exploring Universal Speech Attributes for Speaker Verification with an Improved Cross-stitch Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiajun Qi</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+J">Jingjing Shi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yafeng Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+T">Tan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.06248v3-abstract-short" style="display: inline;"> The universal speech attributes for x-vector based speaker verification (SV) are addressed in this paper. The manner and place of articulation form the fundamental speech attribute unit (SAU), and then new speech attribute (NSA) units for acoustic modeling are generated by tied tri-SAU states. An improved cross-stitch network is adopted as a multitask learning (MTL) framework for integrating these… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06248v3-abstract-full').style.display = 'inline'; document.getElementById('2010.06248v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.06248v3-abstract-full" style="display: none;"> The universal speech attributes for x-vector based speaker verification (SV) are addressed in this paper. The manner and place of articulation form the fundamental speech attribute unit (SAU), and then new speech attribute (NSA) units for acoustic modeling are generated by tied tri-SAU states. An improved cross-stitch network is adopted as a multitask learning (MTL) framework for integrating these universal speech attributes into the x-vector network training process. Experiments are conducted on common condition 5 (CC5) of the core-core and the 10 s-10 s tests of the NIST SRE10 evaluation set, and the proposed algorithm can achieve consistent improvements over the baseline x-vector on both these tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06248v3-abstract-full').style.display = 'none'; document.getElementById('2010.06248v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Not a particularly high-quality work, so we request withdrawal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.14155">arXiv:2009.14155</a> <span> [<a href="https://arxiv.org/pdf/2009.14155">pdf</a>, <a href="https://arxiv.org/format/2009.14155">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Resilience Analysis and Cascading FailureModeling of Power Systems under Extreme Temperatures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Khazeiynasab%2C+S+R">Seyyed Rashid Khazeiynasab</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Junjian Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.14155v1-abstract-short" style="display: inline;"> In this paper, we propose an AC power flow based cascading failure model that explicitly considers external weather conditions, extreme temperatures in particular, and evaluates the impact of extreme temperature on the initiation and propagation of cascading blackouts. Specifically, load and dynamic line rating changes are modeled due to temperature disturbance, the probabilities for transmission… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.14155v1-abstract-full').style.display = 'inline'; document.getElementById('2009.14155v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.14155v1-abstract-full" style="display: none;"> In this paper, we propose an AC power flow based cascading failure model that explicitly considers external weather conditions, extreme temperatures in particular, and evaluates the impact of extreme temperature on the initiation and propagation of cascading blackouts. Specifically, load and dynamic line rating changes are modeled due to temperature disturbance, the probabilities for transmission line and generator outages are evaluated, and the timing for each type of events is carefully calculated to decide the actual event sequence. It should be emphasized that the correlated events, in the advent of external temperature changes, could together contribute to voltage instability. Besides, we model undervoltage load shedding and operator re-dispatch as control strategies for preventing the propagation of cascading failures. The effectiveness of the proposed model is verified by simulation results on the RTS-96 3-area system and it is found that temperature disturbances can lead to correlated load change and line/generator tripping, which together will greatly increase the risk of cascading and voltage instability. Critical temperature change, critical area with temperature disturbance, identification of most vulnerable buses, and comparison of different control strategies are also carefully investigated. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.14155v1-abstract-full').style.display = 'none'; document.getElementById('2009.14155v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.01003">arXiv:2009.01003</a> <span> [<a href="https://arxiv.org/pdf/2009.01003">pdf</a>, <a href="https://arxiv.org/format/2009.01003">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Variational Inference-Based Dropout in Recurrent Neural Networks for Slot Filling in Spoken Language Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xu Liu</a>, <a href="/search/eess?searchtype=author&query=Tejedor%2C+J">Javier Tejedor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.01003v1-abstract-short" style="display: inline;"> This paper proposes to generalize the variational recurrent neural network (RNN) with variational inference (VI)-based dropout regularization employed for the long short-term memory (LSTM) cells to more advanced RNN architectures like gated recurrent unit (GRU) and bi-directional LSTM/GRU. The new variational RNNs are employed for slot filling, which is an intriguing but challenging task in spoken… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.01003v1-abstract-full').style.display = 'inline'; document.getElementById('2009.01003v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.01003v1-abstract-full" style="display: none;"> This paper proposes to generalize the variational recurrent neural network (RNN) with variational inference (VI)-based dropout regularization employed for the long short-term memory (LSTM) cells to more advanced RNN architectures like gated recurrent unit (GRU) and bi-directional LSTM/GRU. The new variational RNNs are employed for slot filling, which is an intriguing but challenging task in spoken language understanding. The experiments on the ATIS dataset suggest that the variational RNNs with the VI-based dropout regularization can significantly improve the naive dropout regularization RNNs-based baseline systems in terms of F-measure. Particularly, the variational RNN with bi-directional LSTM/GRU obtains the best F-measure score. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.01003v1-abstract-full').style.display = 'none'; document.getElementById('2009.01003v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">conference paper, 5 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.07281">arXiv:2008.07281</a> <span> [<a href="https://arxiv.org/pdf/2008.07281">pdf</a>, <a href="https://arxiv.org/ps/2008.07281">ps</a>, <a href="https://arxiv.org/format/2008.07281">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LSP.2020.3016837">10.1109/LSP.2020.3016837 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> On Mean Absolute Error for Deep Neural Network Based Vector-to-Vector Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Du%2C+J">Jun Du</a>, <a href="/search/eess?searchtype=author&query=Siniscalchi%2C+S+M">Sabato Marco Siniscalchi</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+X">Xiaoli Ma</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Chin-Hui Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.07281v1-abstract-short" style="display: inline;"> In this paper, we exploit the properties of mean absolute error (MAE) as a loss function for the deep neural network (DNN) based vector-to-vector regression. The goal of this work is two-fold: (i) presenting performance bounds of MAE, and (ii) demonstrating new properties of MAE that make it more appropriate than mean squared error (MSE) as a loss function for DNN based vector-to-vector regression… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.07281v1-abstract-full').style.display = 'inline'; document.getElementById('2008.07281v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.07281v1-abstract-full" style="display: none;"> In this paper, we exploit the properties of mean absolute error (MAE) as a loss function for the deep neural network (DNN) based vector-to-vector regression. The goal of this work is two-fold: (i) presenting performance bounds of MAE, and (ii) demonstrating new properties of MAE that make it more appropriate than mean squared error (MSE) as a loss function for DNN based vector-to-vector regression. First, we show that a generalized upper-bound for DNN-based vector- to-vector regression can be ensured by leveraging the known Lipschitz continuity property of MAE. Next, we derive a new generalized upper bound in the presence of additive noise. Finally, in contrast to conventional MSE commonly adopted to approximate Gaussian errors for regression, we show that MAE can be interpreted as an error modeled by Laplacian distribution. Speech enhancement experiments are conducted to corroborate our proposed theorems and validate the performance advantages of MAE over MSE for DNN based regression. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.07281v1-abstract-full').style.display = 'none'; document.getElementById('2008.07281v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Signal Processing Letters, 2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.06896">arXiv:2008.06896</a> <span> [<a href="https://arxiv.org/pdf/2008.06896">pdf</a>, <a href="https://arxiv.org/format/2008.06896">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Shape Servoing of Elastic Rods using Parameterized Regression Features and Auto-Tuning Motion Controls </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiaming Qi</a>, <a href="/search/eess?searchtype=author&query=Ran%2C+G">Guangtao Ran</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+B">Bohui Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jian Liu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+W">Wanyu Ma</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+P">Peng Zhou</a>, <a href="/search/eess?searchtype=author&query=Navarro-Alarcon%2C+D">David Navarro-Alarcon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.06896v2-abstract-short" style="display: inline;"> The robotic manipulation of deformable linear objects has shown great potential in a wide range of real-world applications. However, it presents many challenges due to the objects' complex nonlinearity and high-dimensional configuration. In this paper, we propose a new shape servoing framework to automatically manipulate elastic rods through visual feedback. Our new method uses parameterized regre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.06896v2-abstract-full').style.display = 'inline'; document.getElementById('2008.06896v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.06896v2-abstract-full" style="display: none;"> The robotic manipulation of deformable linear objects has shown great potential in a wide range of real-world applications. However, it presents many challenges due to the objects' complex nonlinearity and high-dimensional configuration. In this paper, we propose a new shape servoing framework to automatically manipulate elastic rods through visual feedback. Our new method uses parameterized regression features to compute a compact (low-dimensional) feature vector that quantifies the object's shape, thus, enabling to establish an explicit shape servo-loop. To automatically deform the rod into a desired shape, the proposed adaptive controller iteratively estimates the differential transformation between the robot's motion and the relative shape changes; This valuable capability allows to effectively manipulate objects with unknown mechanical models. An auto-tuning algorithm is introduced to adjust the robot's shaping motions in real-time based on optimal performance criteria. To validate the proposed framework, a detailed experimental study with vision-guided robotic manipulators is presented. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.06896v2-abstract-full').style.display = 'none'; document.getElementById('2008.06896v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.05459">arXiv:2008.05459</a> <span> [<a href="https://arxiv.org/pdf/2008.05459">pdf</a>, <a href="https://arxiv.org/format/2008.05459">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSP.2020.2993164">10.1109/TSP.2020.2993164 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Analyzing Upper Bounds on Mean Absolute Errors for Deep Neural Network Based Vector-to-Vector Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Du%2C+J">Jun Du</a>, <a href="/search/eess?searchtype=author&query=Siniscalchi%2C+S+M">Sabato Marco Siniscalchi</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+X">Xiaoli Ma</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Chin-Hui Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.05459v1-abstract-short" style="display: inline;"> In this paper, we show that, in vector-to-vector regression utilizing deep neural networks (DNNs), a generalized loss of mean absolute error (MAE) between the predicted and expected feature vectors is upper bounded by the sum of an approximation error, an estimation error, and an optimization error. Leveraging upon error decomposition techniques in statistical learning theory and non-convex optimi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.05459v1-abstract-full').style.display = 'inline'; document.getElementById('2008.05459v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.05459v1-abstract-full" style="display: none;"> In this paper, we show that, in vector-to-vector regression utilizing deep neural networks (DNNs), a generalized loss of mean absolute error (MAE) between the predicted and expected feature vectors is upper bounded by the sum of an approximation error, an estimation error, and an optimization error. Leveraging upon error decomposition techniques in statistical learning theory and non-convex optimization theory, we derive upper bounds for each of the three aforementioned errors and impose necessary constraints on DNN models. Moreover, we assess our theoretical results through a set of image de-noising and speech enhancement experiments. Our proposed upper bounds of MAE for DNN based vector-to-vector regression are corroborated by the experimental results and the upper bounds are valid with and without the "over-parametrization" technique. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.05459v1-abstract-full').style.display = 'none'; document.getElementById('2008.05459v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Signal Processing, Vol 68, pp. 3411-3422, 2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.13024">arXiv:2007.13024</a> <span> [<a href="https://arxiv.org/pdf/2007.13024">pdf</a>, <a href="https://arxiv.org/format/2007.13024">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Exploring Deep Hybrid Tensor-to-Vector Network Architectures for Regression Based Speech Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hu Hu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yannan Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C+H">Chao-Han Huck Yang</a>, <a href="/search/eess?searchtype=author&query=Siniscalchi%2C+S+M">Sabato Marco Siniscalchi</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Chin-Hui Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.13024v2-abstract-short" style="display: inline;"> This paper investigates different trade-offs between the number of model parameters and enhanced speech qualities by employing several deep tensor-to-vector regression models for speech enhancement. We find that a hybrid architecture, namely CNN-TT, is capable of maintaining a good quality performance with a reduced model parameter size. CNN-TT is composed of several convolutional layers at the bo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.13024v2-abstract-full').style.display = 'inline'; document.getElementById('2007.13024v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.13024v2-abstract-full" style="display: none;"> This paper investigates different trade-offs between the number of model parameters and enhanced speech qualities by employing several deep tensor-to-vector regression models for speech enhancement. We find that a hybrid architecture, namely CNN-TT, is capable of maintaining a good quality performance with a reduced model parameter size. CNN-TT is composed of several convolutional layers at the bottom for feature extraction to improve speech quality and a tensor-train (TT) output layer on the top to reduce model parameters. We first derive a new upper bound on the generalization power of the convolutional neural network (CNN) based vector-to-vector regression models. Then, we provide experimental evidence on the Edinburgh noisy speech corpus to demonstrate that, in single-channel speech enhancement, CNN outperforms DNN at the expense of a small increment of model sizes. Besides, CNN-TT slightly outperforms the CNN counterpart by utilizing only 32\% of the CNN model parameters. Besides, further performance improvement can be attained if the number of CNN-TT parameters is increased to 44\% of the CNN model size. Finally, our experiments of multi-channel speech enhancement on a simulated noisy WSJ0 corpus demonstrate that our proposed hybrid CNN-TT architecture achieves better results than both DNN and CNN models in terms of better-enhanced speech qualities and smaller parameter sizes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.13024v2-abstract-full').style.display = 'none'; document.getElementById('2007.13024v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to InterSpeech 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.12097">arXiv:2004.12097</a> <span> [<a href="https://arxiv.org/pdf/2004.12097">pdf</a>, <a href="https://arxiv.org/format/2004.12097">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Lyapunov-Stable Adaptive Method to Approximate Sensorimotor Models for Sensor-Based Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Navarro-Alarcon%2C+D">David Navarro-Alarcon</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jiaming Qi</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+J">Jihong Zhu</a>, <a href="/search/eess?searchtype=author&query=Cherubini%2C+A">Andrea Cherubini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.12097v3-abstract-short" style="display: inline;"> In this article, we present a new scheme that approximates unknown sensorimotor models of robots by using feedback signals only. The formulation of the uncalibrated sensor-based regulation problem is first formulated, then, we develop a computational method that distributes the model estimation problem amongst multiple adaptive units that specialise in a local sensorimotor map. Different from trad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.12097v3-abstract-full').style.display = 'inline'; document.getElementById('2004.12097v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.12097v3-abstract-full" style="display: none;"> In this article, we present a new scheme that approximates unknown sensorimotor models of robots by using feedback signals only. The formulation of the uncalibrated sensor-based regulation problem is first formulated, then, we develop a computational method that distributes the model estimation problem amongst multiple adaptive units that specialise in a local sensorimotor map. Different from traditional estimation algorithms, the proposed method requires little data to train and constrain it (the number of required data points can be analytically determined) and has rigorous stability properties (the conditions to satisfy Lyapunov stability are derived). Numerical simulations and experimental results are presented to validate the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.12097v3-abstract-full').style.display = 'none'; document.getElementById('2004.12097v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.13917">arXiv:2003.13917</a> <span> [<a href="https://arxiv.org/pdf/2003.13917">pdf</a>, <a href="https://arxiv.org/format/2003.13917">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICASSP40776.2020.9053288">10.1109/ICASSP40776.2020.9053288 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Characterizing Speech Adversarial Examples Using Self-Attention U-Net Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+C+H">Chao-Han Huck Yang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+P">Pin-Yu Chen</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+X">Xiaoli Ma</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Chin-Hui Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.13917v2-abstract-short" style="display: inline;"> Recent studies have highlighted adversarial examples as ubiquitous threats to the deep neural network (DNN) based speech recognition systems. In this work, we present a U-Net based attention model, U-Net$_{At}$, to enhance adversarial speech signals. Specifically, we evaluate the model performance by interpretable speech recognition metrics and discuss the model performance by the augmented advers… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.13917v2-abstract-full').style.display = 'inline'; document.getElementById('2003.13917v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.13917v2-abstract-full" style="display: none;"> Recent studies have highlighted adversarial examples as ubiquitous threats to the deep neural network (DNN) based speech recognition systems. In this work, we present a U-Net based attention model, U-Net$_{At}$, to enhance adversarial speech signals. Specifically, we evaluate the model performance by interpretable speech recognition metrics and discuss the model performance by the augmented adversarial training. Our experiments show that our proposed U-Net$_{At}$ improves the perceptual evaluation of speech quality (PESQ) from 1.13 to 2.78, speech transmission index (STI) from 0.65 to 0.75, short-term objective intelligibility (STOI) from 0.83 to 0.96 on the task of speech enhancement with adversarial speech examples. We conduct experiments on the automatic speech recognition (ASR) task with adversarial audio attacks. We find that (i) temporal features learned by the attention network are capable of enhancing the robustness of DNN based ASR models; (ii) the generalization power of DNN based ASR model could be enhanced by applying adversarial training with an additive adversarial data augmentation. The ASR metric on word-error-rates (WERs) shows that there is an absolute 2.22 $\%$ decrease under gradient-based perturbation, and an absolute 2.03 $\%$ decrease, under evolutionary-optimized perturbation, which suggests that our enhancement models with adversarial training can further secure a resilient ASR system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.13917v2-abstract-full').style.display = 'none'; document.getElementById('2003.13917v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The authors have revised some annotations in Table 4 to improve the clarity. The authors thank reading feedbacks from Jonathan Le Roux. The first draft was finished in August 2019. Accepted to IEEE ICASSP 2020</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.09027">arXiv:2002.09027</a> <span> [<a href="https://arxiv.org/pdf/2002.09027">pdf</a>, <a href="https://arxiv.org/format/2002.09027">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICASSP40776.2020.9053342">10.1109/ICASSP40776.2020.9053342 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Enhanced Adversarial Strategically-Timed Attacks against Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+C+H">Chao-Han Huck Yang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+P">Pin-Yu Chen</a>, <a href="/search/eess?searchtype=author&query=Ouyang%2C+Y">Yi Ouyang</a>, <a href="/search/eess?searchtype=author&query=Hung%2C+I+D">I-Te Danny Hung</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Chin-Hui Lee</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+X">Xiaoli Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.09027v1-abstract-short" style="display: inline;"> Recent deep neural networks based techniques, especially those equipped with the ability of self-adaptation in the system level such as deep reinforcement learning (DRL), are shown to possess many advantages of optimizing robot learning systems (e.g., autonomous navigation and continuous robot arm control.) However, the learning-based systems and the associated models may be threatened by the risk… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.09027v1-abstract-full').style.display = 'inline'; document.getElementById('2002.09027v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.09027v1-abstract-full" style="display: none;"> Recent deep neural networks based techniques, especially those equipped with the ability of self-adaptation in the system level such as deep reinforcement learning (DRL), are shown to possess many advantages of optimizing robot learning systems (e.g., autonomous navigation and continuous robot arm control.) However, the learning-based systems and the associated models may be threatened by the risks of intentionally adaptive (e.g., noisy sensor confusion) and adversarial perturbations from real-world scenarios. In this paper, we introduce timing-based adversarial strategies against a DRL-based navigation system by jamming in physical noise patterns on the selected time frames. To study the vulnerability of learning-based navigation systems, we propose two adversarial agent models: one refers to online learning; another one is based on evolutionary learning. Besides, three open-source robot learning and navigation control environments are employed to study the vulnerability under adversarial timing attacks. Our experimental results show that the adversarial timing attacks can lead to a significant performance drop, and also suggest the necessity of enhancing the robustness of robot learning systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.09027v1-abstract-full').style.display = 'none'; document.getElementById('2002.09027v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IEEE ICASSP 2020</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.00544">arXiv:2002.00544</a> <span> [<a href="https://arxiv.org/pdf/2002.00544">pdf</a>, <a href="https://arxiv.org/format/2002.00544">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Tensor-to-Vector Regression for Multi-channel Speech Enhancement based on Tensor-Train Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+H">Hu Hu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yannan Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C+H">Chao-Han Huck Yang</a>, <a href="/search/eess?searchtype=author&query=Siniscalchi%2C+S+M">Sabato Marco Siniscalchi</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C">Chin-Hui Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.00544v1-abstract-short" style="display: inline;"> We propose a tensor-to-vector regression approach to multi-channel speech enhancement in order to address the issue of input size explosion and hidden-layer size expansion. The key idea is to cast the conventional deep neural network (DNN) based vector-to-vector regression formulation under a tensor-train network (TTN) framework. TTN is a recently emerged solution for compact representation of dee… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.00544v1-abstract-full').style.display = 'inline'; document.getElementById('2002.00544v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.00544v1-abstract-full" style="display: none;"> We propose a tensor-to-vector regression approach to multi-channel speech enhancement in order to address the issue of input size explosion and hidden-layer size expansion. The key idea is to cast the conventional deep neural network (DNN) based vector-to-vector regression formulation under a tensor-train network (TTN) framework. TTN is a recently emerged solution for compact representation of deep models with fully connected hidden layers. Thus TTN maintains DNN's expressive power yet involves a much smaller amount of trainable parameters. Furthermore, TTN can handle a multi-dimensional tensor input by design, which exactly matches the desired setting in multi-channel speech enhancement. We first provide a theoretical extension from DNN to TTN based regression. Next, we show that TTN can attain speech enhancement quality comparable with that for DNN but with much fewer parameters, e.g., a reduction from 27 million to only 5 million parameters is observed in a single-channel scenario. TTN also improves PESQ over DNN from 2.86 to 2.96 by slightly increasing the number of trainable parameters. Finally, in 8-channel conditions, a PESQ of 3.12 is achieved using 20 million parameters for TTN, whereas a DNN with 68 million parameters can only attain a PESQ of 3.06. Our implementation is available online https://github.com/uwjunqi/Tensor-Train-Neural-Network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.00544v1-abstract-full').style.display = 'none'; document.getElementById('2002.00544v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICASSP 2020. Update reproducible code</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE ICASSP 2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2001.10529">arXiv:2001.10529</a> <span> [<a href="https://arxiv.org/pdf/2001.10529">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICASSP40776.2020.9054219">10.1109/ICASSP40776.2020.9054219 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Submodular Rank Aggregation on Score-based Permutations for Distributed Automatic Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+J">Jun Qi</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C+H">Chao-Han Huck Yang</a>, <a href="/search/eess?searchtype=author&query=Tejedor%2C+J">Javier Tejedor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2001.10529v1-abstract-short" style="display: inline;"> Distributed automatic speech recognition (ASR) requires to aggregate outputs of distributed deep neural network (DNN)-based models. This work studies the use of submodular functions to design a rank aggregation on score-based permutations, which can be used for distributed ASR systems in both supervised and unsupervised modes. Specifically, we compose an aggregation rank function based on the Lova… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.10529v1-abstract-full').style.display = 'inline'; document.getElementById('2001.10529v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2001.10529v1-abstract-full" style="display: none;"> Distributed automatic speech recognition (ASR) requires to aggregate outputs of distributed deep neural network (DNN)-based models. This work studies the use of submodular functions to design a rank aggregation on score-based permutations, which can be used for distributed ASR systems in both supervised and unsupervised modes. Specifically, we compose an aggregation rank function based on the Lovasz Bregman divergence for setting up linear structured convex and nested structured concave functions. The algorithm is based on stochastic gradient descent (SGD) and can obtain well-trained aggregation models. Our experiments on the distributed ASR system show that the submodular rank aggregation can obtain higher speech recognition accuracy than traditional aggregation methods like Adaboost. Code is available online~\footnote{https://github.com/uwjunqi/Subrank}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.10529v1-abstract-full').style.display = 'none'; document.getElementById('2001.10529v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICASSP 2020. Please download the pdf to view Figure 1. arXiv admin note: substantial text overlap with arXiv:1707.01166</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Qi%2C+J&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Qi%2C+J&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Qi%2C+J&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>