Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 106 results for author: <span class="mathjax">Feng, S</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Feng%2C+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Feng, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Feng%2C+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Feng, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Feng%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Feng%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Feng%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Feng%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08329">arXiv:2410.08329</a> <span> [<a href="https://arxiv.org/pdf/2410.08329">pdf</a>, <a href="https://arxiv.org/format/2410.08329">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Physics and Deep Learning in Computational Wave Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Youzuo Lin</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shihang Feng</a>, <a href="/search/eess?searchtype=author&query=Theiler%2C+J">James Theiler</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yinpeng Chen</a>, <a href="/search/eess?searchtype=author&query=Villa%2C+U">Umberto Villa</a>, <a href="/search/eess?searchtype=author&query=Rao%2C+J">Jing Rao</a>, <a href="/search/eess?searchtype=author&query=Greenhall%2C+J">John Greenhall</a>, <a href="/search/eess?searchtype=author&query=Pantea%2C+C">Cristian Pantea</a>, <a href="/search/eess?searchtype=author&query=Anastasio%2C+M+A">Mark A. Anastasio</a>, <a href="/search/eess?searchtype=author&query=Wohlberg%2C+B">Brendt Wohlberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08329v1-abstract-short" style="display: inline;"> Computational wave imaging (CWI) extracts hidden structure and physical properties of a volume of material by analyzing wave signals that traverse that volume. Applications include seismic exploration of the Earth's subsurface, acoustic imaging and non-destructive testing in material science, and ultrasound computed tomography in medicine. Current approaches for solving CWI problems can be divided… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08329v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08329v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08329v1-abstract-full" style="display: none;"> Computational wave imaging (CWI) extracts hidden structure and physical properties of a volume of material by analyzing wave signals that traverse that volume. Applications include seismic exploration of the Earth's subsurface, acoustic imaging and non-destructive testing in material science, and ultrasound computed tomography in medicine. Current approaches for solving CWI problems can be divided into two categories: those rooted in traditional physics, and those based on deep learning. Physics-based methods stand out for their ability to provide high-resolution and quantitatively accurate estimates of acoustic properties within the medium. However, they can be computationally intensive and are susceptible to ill-posedness and nonconvexity typical of CWI problems. Machine learning-based computational methods have recently emerged, offering a different perspective to address these challenges. Diverse scientific communities have independently pursued the integration of deep learning in CWI. This review delves into how contemporary scientific machine-learning (ML) techniques, and deep neural networks in particular, have been harnessed to tackle CWI problems. We present a structured framework that consolidates existing research spanning multiple domains, including computational imaging, wave physics, and data science. This study concludes with important lessons learned from existing ML-based methods and identifies technical hurdles and emerging trends through a systematic analysis of the extensive literature on this topic. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08329v1-abstract-full').style.display = 'none'; document.getElementById('2410.08329v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18787">arXiv:2409.18787</a> <span> [<a href="https://arxiv.org/pdf/2409.18787">pdf</a>, <a href="https://arxiv.org/format/2409.18787">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Asymptotic tracking control of dynamic reference over homomorphically encrypted data with finite modulus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuai Feng</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J">Junsoo Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18787v1-abstract-short" style="display: inline;"> This paper considers a tracking control problem, in which the dynamic controller is encrypted with an additively homomorphic encryption scheme and the output of a process tracks a dynamic reference asymptotically. Our paper is motivated by the following problem: When dealing with both asymptotic tracking and dynamic reference, we find that the control input is generally subject to overflow issues… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18787v1-abstract-full').style.display = 'inline'; document.getElementById('2409.18787v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18787v1-abstract-full" style="display: none;"> This paper considers a tracking control problem, in which the dynamic controller is encrypted with an additively homomorphic encryption scheme and the output of a process tracks a dynamic reference asymptotically. Our paper is motivated by the following problem: When dealing with both asymptotic tracking and dynamic reference, we find that the control input is generally subject to overflow issues under a finite modulus, though the dynamic controller consists of only integer coefficients. First, we provide a new controller design method such that the coefficients of the tracking controller can be transformed into integers leveraging the zooming-in factor of dynamic quantization. By the Cayley-Hamilton theorem, we represent the control input as linear combination of the previous control inputs. Leveraging the property above, we design an algorithm on the actuator side such that it can restore the control input from the lower bits under a finite modulus. A lower bound of the modulus is also provided. As an extension of the first result, we further solve the problem of unbounded internal state taking place in the actuator. In particular, the actuator can restore the correct control input under the same modulus. A simulation example is provided to verify the control schemes proposed in our paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18787v1-abstract-full').style.display = 'none'; document.getElementById('2409.18787v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15708">arXiv:2409.15708</a> <span> [<a href="https://arxiv.org/pdf/2409.15708">pdf</a>, <a href="https://arxiv.org/format/2409.15708">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Open-/Closed-loop Active Learning for Data-driven Predictive Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shilun Feng</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+D">Dawei Shi</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Y">Yang Shi</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+K">Kaikai Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15708v1-abstract-short" style="display: inline;"> An important question in data-driven control is how to obtain an informative dataset. In this work, we consider the problem of effective data acquisition of an unknown linear system with bounded disturbance for both open-loop and closed-loop stages. The learning objective is to minimize the volume of the set of admissible systems. First, a performance measure based on historical data and the input… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15708v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15708v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15708v1-abstract-full" style="display: none;"> An important question in data-driven control is how to obtain an informative dataset. In this work, we consider the problem of effective data acquisition of an unknown linear system with bounded disturbance for both open-loop and closed-loop stages. The learning objective is to minimize the volume of the set of admissible systems. First, a performance measure based on historical data and the input sequence is introduced to characterize the upper bound of the volume of the set of admissible systems. On the basis of this performance measure, an open-loop active learning strategy is proposed to minimize the volume by actively designing inputs during the open-loop stage. For the closed-loop stage, an closed-loop active learning strategy is designed to select and learn from informative closed-loop data. The efficiency of the proposed closed-loop active learning strategy is proved by showing that the unselected data cannot benefit the learning performance. Furthermore, an adaptive predictive controller is designed in accordance with the proposed data acquisition approach. The recursive feasibility and the stability of the controller are proved by analyzing the effect of the closed-loop active learning strategy. Finally, numerical examples and comparisons illustrate the effectiveness of the proposed data acquisition strategy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15708v1-abstract-full').style.display = 'none'; document.getElementById('2409.15708v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14369">arXiv:2409.14369</a> <span> [<a href="https://arxiv.org/pdf/2409.14369">pdf</a>, <a href="https://arxiv.org/format/2409.14369">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Few-Shot Testing of Autonomous Vehicles with Scenario Similarity Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+S">Shu Li</a>, <a href="/search/eess?searchtype=author&query=He%2C+H">Honglin He</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jingxuan Yang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+J">Jianming Hu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuo Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14369v1-abstract-short" style="display: inline;"> Testing and evaluation are critical to the development and deployment of autonomous vehicles (AVs). Given the rarity of safety-critical events such as crashes, millions of tests are typically needed to accurately assess AV safety performance. Although techniques like importance sampling can accelerate this process, it usually still requires too many numbers of tests for field testing. This severel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14369v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14369v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14369v1-abstract-full" style="display: none;"> Testing and evaluation are critical to the development and deployment of autonomous vehicles (AVs). Given the rarity of safety-critical events such as crashes, millions of tests are typically needed to accurately assess AV safety performance. Although techniques like importance sampling can accelerate this process, it usually still requires too many numbers of tests for field testing. This severely hinders the testing and evaluation process, especially for third-party testers and governmental bodies with very limited testing budgets. The rapid development cycles of AV technology further exacerbate this challenge. To fill this research gap, this paper introduces the few-shot testing (FST) problem and proposes a methodological framework to tackle it. As the testing budget is very limited, usually smaller than 100, the FST method transforms the testing scenario generation problem from probabilistic sampling to deterministic optimization, reducing the uncertainty of testing results. To optimize the selection of testing scenarios, a cross-attention similarity mechanism is proposed to learn to extract the information of AV's testing scenario space. This allows iterative searches for scenarios with the smallest evaluation error, ensuring precise testing within budget constraints. Experimental results in cut-in scenarios demonstrate the effectiveness of the FST method, significantly enhancing accuracy and enabling efficient, precise AV testing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14369v1-abstract-full').style.display = 'none'; document.getElementById('2409.14369v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11711">arXiv:2409.11711</a> <span> [<a href="https://arxiv.org/pdf/2409.11711">pdf</a>, <a href="https://arxiv.org/format/2409.11711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LFIC-DRASC: Deep Light Field Image Compression Using Disentangled Representation and Asymmetrical Strip Convolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shiyu Feng</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yun Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Linwei Zhu</a>, <a href="/search/eess?searchtype=author&query=Kwong%2C+S">Sam Kwong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11711v1-abstract-short" style="display: inline;"> Light-Field (LF) image is emerging 4D data of light rays that is capable of realistically presenting spatial and angular information of 3D scene. However, the large data volume of LF images becomes the most challenging issue in real-time processing, transmission, and storage. In this paper, we propose an end-to-end deep LF Image Compression method Using Disentangled Representation and Asymmetrical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11711v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11711v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11711v1-abstract-full" style="display: none;"> Light-Field (LF) image is emerging 4D data of light rays that is capable of realistically presenting spatial and angular information of 3D scene. However, the large data volume of LF images becomes the most challenging issue in real-time processing, transmission, and storage. In this paper, we propose an end-to-end deep LF Image Compression method Using Disentangled Representation and Asymmetrical Strip Convolution (LFIC-DRASC) to improve coding efficiency. Firstly, we formulate the LF image compression problem as learning a disentangled LF representation network and an image encoding-decoding network. Secondly, we propose two novel feature extractors that leverage the structural prior of LF data by integrating features across different dimensions. Meanwhile, disentangled LF representation network is proposed to enhance the LF feature disentangling and decoupling. Thirdly, we propose the LFIC-DRASC for LF image compression, where two Asymmetrical Strip Convolution (ASC) operators, i.e. horizontal and vertical, are proposed to capture long-range correlation in LF feature space. These two ASC operators can be combined with the square convolution to further decouple LF features, which enhances the model ability in representing intricate spatial relationships. Experimental results demonstrate that the proposed LFIC-DRASC achieves an average of 20.5\% bit rate reductions comparing with the state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11711v1-abstract-full').style.display = 'none'; document.getElementById('2409.11711v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20622">arXiv:2407.20622</a> <span> [<a href="https://arxiv.org/pdf/2407.20622">pdf</a>, <a href="https://arxiv.org/format/2407.20622">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Decoding Linguistic Representations of Human Brain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Heyang Liu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuhao Wang</a>, <a href="/search/eess?searchtype=author&query=Xuan%2C+C">Chuan Xuan</a>, <a href="/search/eess?searchtype=author&query=Hou%2C+Y">Yixuan Hou</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Sheng Feng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hongcheng Liu</a>, <a href="/search/eess?searchtype=author&query=Liao%2C+Y">Yusheng Liao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yanfeng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20622v1-abstract-short" style="display: inline;"> Language, as an information medium created by advanced organisms, has always been a concern of neuroscience regarding how it is represented in the brain. Decoding linguistic representations in the evoked brain has shown groundbreaking achievements, thanks to the rapid improvement of neuroimaging, medical technology, life sciences and artificial intelligence. In this work, we present a taxonomy of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20622v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20622v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20622v1-abstract-full" style="display: none;"> Language, as an information medium created by advanced organisms, has always been a concern of neuroscience regarding how it is represented in the brain. Decoding linguistic representations in the evoked brain has shown groundbreaking achievements, thanks to the rapid improvement of neuroimaging, medical technology, life sciences and artificial intelligence. In this work, we present a taxonomy of brain-to-language decoding of both textual and speech formats. This work integrates two types of research: neuroscience focusing on language understanding and deep learning-based brain decoding. Generating discernible language information from brain activity could not only help those with limited articulation, especially amyotrophic lateral sclerosis (ALS) patients but also open up a new way for the next generation's brain-computer interface (BCI). This article will help brain scientists and deep-learning researchers to gain a bird's eye view of fine-grained language perception, and thus facilitate their further investigation and research of neural process and language decoding. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20622v1-abstract-full').style.display = 'none'; document.getElementById('2407.20622v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16588">arXiv:2406.16588</a> <span> [<a href="https://arxiv.org/pdf/2406.16588">pdf</a>, <a href="https://arxiv.org/format/2406.16588">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> Switching Controller Synthesis for Hybrid Systems Against STL Formulas </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Su%2C+H">Han Su</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shenghua Feng</a>, <a href="/search/eess?searchtype=author&query=Zhan%2C+S">Sinong Zhan</a>, <a href="/search/eess?searchtype=author&query=Zhan%2C+N">Naijun Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16588v1-abstract-short" style="display: inline;"> Switching controllers play a pivotal role in directing hybrid systems (HSs) towards the desired objective, embodying a ``correct-by-construction'' approach to HS design. Identifying these objectives is thus crucial for the synthesis of effective switching controllers. While most of existing works focus on safety and liveness, few of them consider timing constraints. In this paper, we delves into t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16588v1-abstract-full').style.display = 'inline'; document.getElementById('2406.16588v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16588v1-abstract-full" style="display: none;"> Switching controllers play a pivotal role in directing hybrid systems (HSs) towards the desired objective, embodying a ``correct-by-construction'' approach to HS design. Identifying these objectives is thus crucial for the synthesis of effective switching controllers. While most of existing works focus on safety and liveness, few of them consider timing constraints. In this paper, we delves into the synthesis of switching controllers for HSs that meet system objectives given by a fragment of STL, which essentially corresponds to a reach-avoid problem with timing constraints. Our approach involves iteratively computing the state sets that can be driven to satisfy the reach-avoid specification with timing constraints. This technique supports to create switching controllers for both constant and non-constant HSs. We validate our method's soundness, and confirm its relative completeness for a certain subclass of HSs. Experiment results affirms the efficacy of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16588v1-abstract-full').style.display = 'none'; document.getElementById('2406.16588v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11568">arXiv:2406.11568</a> <span> [<a href="https://arxiv.org/pdf/2406.11568">pdf</a>, <a href="https://arxiv.org/format/2406.11568">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.21437/Interspeech.2024-382">10.21437/Interspeech.2024-382 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Towards an End-to-End Framework for Invasive Brain Signal Decoding with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Sheng Feng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Heyang Liu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yanfeng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11568v1-abstract-short" style="display: inline;"> In this paper, we introduce a groundbreaking end-to-end (E2E) framework for decoding invasive brain signals, marking a significant advancement in the field of speech neuroprosthesis. Our methodology leverages the comprehensive reasoning abilities of large language models (LLMs) to facilitate direct decoding. By fully integrating LLMs, we achieve results comparable to the state-of-the-art cascade m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11568v1-abstract-full').style.display = 'inline'; document.getElementById('2406.11568v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11568v1-abstract-full" style="display: none;"> In this paper, we introduce a groundbreaking end-to-end (E2E) framework for decoding invasive brain signals, marking a significant advancement in the field of speech neuroprosthesis. Our methodology leverages the comprehensive reasoning abilities of large language models (LLMs) to facilitate direct decoding. By fully integrating LLMs, we achieve results comparable to the state-of-the-art cascade models. Our findings underscore the immense potential of E2E frameworks in speech neuroprosthesis, particularly as the technology behind brain-computer interfaces (BCIs) and the availability of relevant datasets continue to evolve. This work not only showcases the efficacy of combining LLMs with E2E decoding for enhancing speech neuroprosthesis but also sets a new direction for future research in BCI applications, underscoring the impact of LLMs in decoding complex neural signals for communication restoration. Code will be made available at https://github.com/FsFrancis15/BrainLLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11568v1-abstract-full').style.display = 'none'; document.getElementById('2406.11568v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of Interspeech2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.08306">arXiv:2405.08306</a> <span> [<a href="https://arxiv.org/pdf/2405.08306">pdf</a>, <a href="https://arxiv.org/format/2405.08306">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Flight Path Optimization with Optimal Control Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Su%2C+G">Gaofeng Su</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+X">Xi Cheng</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Siyuan Feng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+K">Ke Liu</a>, <a href="/search/eess?searchtype=author&query=Song%2C+J">Jilin Song</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jianan Chen</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+C">Chen Zhu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+H">Hui Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.08306v2-abstract-short" style="display: inline;"> This paper is based on a crucial issue in the aviation world: how to optimize the trajectory and controls given to the aircraft in order to optimize flight time and fuel consumption. This study aims to provide elements of a response to this problem and to define, under certain simplifying assumptions, an optimal response, using Constrained Finite Time Optimal Control(CFTOC). The first step is to d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08306v2-abstract-full').style.display = 'inline'; document.getElementById('2405.08306v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.08306v2-abstract-full" style="display: none;"> This paper is based on a crucial issue in the aviation world: how to optimize the trajectory and controls given to the aircraft in order to optimize flight time and fuel consumption. This study aims to provide elements of a response to this problem and to define, under certain simplifying assumptions, an optimal response, using Constrained Finite Time Optimal Control(CFTOC). The first step is to define the dynamic model of the aircraft in accordance with the controllable inputs and wind disturbances. Then we will identify a precise objective in terms of optimization and implement an optimization program to solve it under the circumstances of simulated real flight situation. Finally, the optimization result is validated and discussed by different scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08306v2-abstract-full').style.display = 'none'; document.getElementById('2405.08306v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.06230">arXiv:2405.06230</a> <span> [<a href="https://arxiv.org/pdf/2405.06230">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Fire in SRRN: Next-Gen 3D Temperature Field Reconstruction Technology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shenxiang Feng</a>, <a href="/search/eess?searchtype=author&query=Hao%2C+X">Xiaojian Hao</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+X">Xiaodong Huang</a>, <a href="/search/eess?searchtype=author&query=Pei%2C+P">Pan Pei</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+T">Tong Wei</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+C">Chenyang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.06230v1-abstract-short" style="display: inline;"> In aerospace and energy engineering, accurate 3D combustion field temperature measurement is critical. The resolution of traditional methods based on algebraic iteration is limited by the initial voxel division. This study introduces a novel method for reconstructing three-dimensional temperature fields using the Spatial Radiation Representation Network (SRRN). This method utilizes the flame therm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06230v1-abstract-full').style.display = 'inline'; document.getElementById('2405.06230v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.06230v1-abstract-full" style="display: none;"> In aerospace and energy engineering, accurate 3D combustion field temperature measurement is critical. The resolution of traditional methods based on algebraic iteration is limited by the initial voxel division. This study introduces a novel method for reconstructing three-dimensional temperature fields using the Spatial Radiation Representation Network (SRRN). This method utilizes the flame thermal radiation characteristics and differentiable rendering in graphics, and combines it with a multi-layer perceptron to achieve a functional representation of the flame temperature field. The effectiveness of SRRN is evaluated through simulated temperature field reconstruction experiments with different levels of complexity. The maximum root mean square error is 10.17, which proves the robustness of the algorithm to Gaussian noise and salt-and-pepper noise. We conducted a butane flame temperature field reconstruction experiment, and the maximum relative error between the reconstruction result and the thermocouple measurement value was 4.86%, confirming that the algorithm can achieve accurate reconstruction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06230v1-abstract-full').style.display = 'none'; document.getElementById('2405.06230v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13748">arXiv:2404.13748</a> <span> [<a href="https://arxiv.org/pdf/2404.13748">pdf</a>, <a href="https://arxiv.org/format/2404.13748">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Application of Kalman Filter in Stochastic Differential Equations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Bao%2C+W">Wencheng Bao</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shi Feng</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+K">Kaiwen Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13748v1-abstract-short" style="display: inline;"> In areas such as finance, engineering, and science, we often face situations that change quickly and unpredictably. These situations are tough to handle and require special tools and methods capable of understanding and predicting what might happen next. Stochastic Differential Equations (SDEs) are renowned for modeling and analyzing real-world dynamical systems. However, obtaining the parameters,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13748v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13748v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13748v1-abstract-full" style="display: none;"> In areas such as finance, engineering, and science, we often face situations that change quickly and unpredictably. These situations are tough to handle and require special tools and methods capable of understanding and predicting what might happen next. Stochastic Differential Equations (SDEs) are renowned for modeling and analyzing real-world dynamical systems. However, obtaining the parameters, boundary conditions, and closed-form solutions of SDEs can often be challenging. In this paper, we will discuss the application of Kalman filtering theory to SDEs, including Extended Kalman filtering and Particle Extended Kalman filtering. We will explore how to fit existing SDE systems through filtering and track the original SDEs by fitting the obtained closed-form solutions. This approach aims to gather more information about these SDEs, which could be used in various ways, such as incorporating them into parameters of data-based SDE models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13748v1-abstract-full').style.display = 'none'; document.getElementById('2404.13748v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09500">arXiv:2404.09500</a> <span> [<a href="https://arxiv.org/pdf/2404.09500">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> On-chip Real-time Hyperspectral Imager with Full CMOS Resolution Enabled by Massively Parallel Neural Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wen%2C+J">Junren Wen</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+H">Haiqi Gao</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+W">Weiming Shi</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuaibo Feng</a>, <a href="/search/eess?searchtype=author&query=Hao%2C+L">Lingyun Hao</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yujie Liu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Liang Xu</a>, <a href="/search/eess?searchtype=author&query=Shao%2C+Y">Yuchuan Shao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yueguang Zhang</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+W">Weidong Shen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chenying Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09500v1-abstract-short" style="display: inline;"> Traditional spectral imaging methods are constrained by the time-consuming scanning process, limiting the application in dynamic scenarios. One-shot spectral imaging based on reconstruction has been a hot research topic recently and the primary challenges still lie in both efficient fabrication techniques suitable for mass production and the high-speed, high-accuracy reconstruction algorithm for r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09500v1-abstract-full').style.display = 'inline'; document.getElementById('2404.09500v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09500v1-abstract-full" style="display: none;"> Traditional spectral imaging methods are constrained by the time-consuming scanning process, limiting the application in dynamic scenarios. One-shot spectral imaging based on reconstruction has been a hot research topic recently and the primary challenges still lie in both efficient fabrication techniques suitable for mass production and the high-speed, high-accuracy reconstruction algorithm for real-time spectral imaging. In this study, we introduce an innovative on-chip real-time hyperspectral imager that leverages nanophotonic film spectral encoders and a Massively Parallel Network (MP-Net), featuring a 4 * 4 array of compact, all-dielectric film units for the micro-spectrometers. Each curved nanophotonic film unit uniquely modulates incident light across the underlying 3 * 3 CMOS image sensor (CIS) pixels, enabling a high spatial resolution equivalent to the full CMOS resolution. The implementation of MP-Net, specially designed to address variability in transmittance and manufacturing errors such as misalignment and non-uniformities in thin film deposition, can greatly increase the structural tolerance of the device and reduce the preparation requirement, further simplifying the manufacturing process. Tested in varied environments on both static and moving objects, the real-time hyperspectral imager demonstrates the robustness and high-fidelity spatial-spectral data capabilities across diverse scenarios. This on-chip hyperspectral imager represents a significant advancement in real-time, high-resolution spectral imaging, offering a versatile solution for applications ranging from environmental monitoring, remote sensing to consumer electronics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09500v1-abstract-full').style.display = 'none'; document.getElementById('2404.09500v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.07959">arXiv:2404.07959</a> <span> [<a href="https://arxiv.org/pdf/2404.07959">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Damage identification of offshore jacket platforms in a digital twin framework considering optimal sensor placement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+M">Mengmeng Wang</a>, <a href="/search/eess?searchtype=author&query=Incecik%2C+A">Atilla Incecik</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shizhe Feng</a>, <a href="/search/eess?searchtype=author&query=Gupta%2C+M+K">M. K. Gupta</a>, <a href="/search/eess?searchtype=author&query=Krlolczyk%2C+G">Grzegorz Krlolczyk</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Z Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.07959v1-abstract-short" style="display: inline;"> A new digital twin (DT) framework with optimal sensor placement (OSP) is proposed to accurately calculate the modal responses and identify the damage ratios of the offshore jacket platforms. The proposed damage identification framework consists of two models (namely one OSP model and one damage identification model). The OSP model adopts the multi-objective Lichtenberg algorithm (MOLA) to perform… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07959v1-abstract-full').style.display = 'inline'; document.getElementById('2404.07959v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.07959v1-abstract-full" style="display: none;"> A new digital twin (DT) framework with optimal sensor placement (OSP) is proposed to accurately calculate the modal responses and identify the damage ratios of the offshore jacket platforms. The proposed damage identification framework consists of two models (namely one OSP model and one damage identification model). The OSP model adopts the multi-objective Lichtenberg algorithm (MOLA) to perform the sensor number/location optimization to make a good balance between the sensor cost and the modal calculation accuracy. In the damage identification model, the Markov Chain Monte Carlo (MCMC)-Bayesian method is developed to calculate the structural damage ratios based on the modal information obtained from the sensory measurements, where the uncertainties of the structural parameters are quantified. The proposed method is validated using an offshore jacket platform, and the analysis results demonstrate efficient identification of the structural damage location and severity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07959v1-abstract-full').style.display = 'none'; document.getElementById('2404.07959v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.19275">arXiv:2402.19275</a> <span> [<a href="https://arxiv.org/pdf/2402.19275">pdf</a>, <a href="https://arxiv.org/format/2402.19275">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Testing Environment Generation for Connected and Automated Vehicles with Dense Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jingxuan Yang</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+R">Ruoxuan Bai</a>, <a href="/search/eess?searchtype=author&query=Ji%2C+H">Haoyuan Ji</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+J">Jianming Hu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuo Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.19275v1-abstract-short" style="display: inline;"> The assessment of safety performance plays a pivotal role in the development and deployment of connected and automated vehicles (CAVs). A common approach involves designing testing scenarios based on prior knowledge of CAVs (e.g., surrogate models), conducting tests in these scenarios, and subsequently evaluating CAVs' safety performances. However, substantial differences between CAVs and the prio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.19275v1-abstract-full').style.display = 'inline'; document.getElementById('2402.19275v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.19275v1-abstract-full" style="display: none;"> The assessment of safety performance plays a pivotal role in the development and deployment of connected and automated vehicles (CAVs). A common approach involves designing testing scenarios based on prior knowledge of CAVs (e.g., surrogate models), conducting tests in these scenarios, and subsequently evaluating CAVs' safety performances. However, substantial differences between CAVs and the prior knowledge can significantly diminish the evaluation efficiency. In response to this issue, existing studies predominantly concentrate on the adaptive design of testing scenarios during the CAV testing process. Yet, these methods have limitations in their applicability to high-dimensional scenarios. To overcome this challenge, we develop an adaptive testing environment that bolsters evaluation robustness by incorporating multiple surrogate models and optimizing the combination coefficients of these surrogate models to enhance evaluation efficiency. We formulate the optimization problem as a regression task utilizing quadratic programming. To efficiently obtain the regression target via reinforcement learning, we propose the dense reinforcement learning method and devise a new adaptive policy with high sample efficiency. Essentially, our approach centers on learning the values of critical scenes displaying substantial surrogate-to-real gaps. The effectiveness of our method is validated in high-dimensional overtaking scenarios, demonstrating that our approach achieves notable evaluation efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.19275v1-abstract-full').style.display = 'none'; document.getElementById('2402.19275v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01795">arXiv:2402.01795</a> <span> [<a href="https://arxiv.org/pdf/2402.01795">pdf</a>, <a href="https://arxiv.org/format/2402.01795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/IV55156.2024.10588417">10.1109/IV55156.2024.10588417 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Few-Shot Scenario Testing for Autonomous Vehicles Based on Neighborhood Coverage and Similarity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+S">Shu Li</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jingxuan Yang</a>, <a href="/search/eess?searchtype=author&query=He%2C+H">Honglin He</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+J">Jianming Hu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuo Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01795v2-abstract-short" style="display: inline;"> Testing and evaluating the safety performance of autonomous vehicles (AVs) is essential before the large-scale deployment. Practically, the number of testing scenarios permissible for a specific AV is severely limited by tight constraints on testing budgets and time. With the restrictions imposed by strictly restricted numbers of tests, existing testing methods often lead to significant uncertaint… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01795v2-abstract-full').style.display = 'inline'; document.getElementById('2402.01795v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01795v2-abstract-full" style="display: none;"> Testing and evaluating the safety performance of autonomous vehicles (AVs) is essential before the large-scale deployment. Practically, the number of testing scenarios permissible for a specific AV is severely limited by tight constraints on testing budgets and time. With the restrictions imposed by strictly restricted numbers of tests, existing testing methods often lead to significant uncertainty or difficulty to quantifying evaluation results. In this paper, we formulate this problem for the first time the "few-shot testing" (FST) problem and propose a systematic framework to address this challenge. To alleviate the considerable uncertainty inherent in a small testing scenario set, we frame the FST problem as an optimization problem and search for the testing scenario set based on neighborhood coverage and similarity. Specifically, under the guidance of better generalization ability of the testing scenario set on AVs, we dynamically adjust this set and the contribution of each testing scenario to the evaluation result based on coverage, leveraging the prior information of surrogate models (SMs). With certain hypotheses on SMs, a theoretical upper bound of evaluation error is established to verify the sufficiency of evaluation accuracy within the given limited number of tests. The experiment results on cut-in scenarios demonstrate a notable reduction in evaluation error and variance of our method compared to conventional testing methods, especially for situations with a strict limit on the number of scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01795v2-abstract-full').style.display = 'none'; document.getElementById('2402.01795v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Intelligent Vehicle Symponsium 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.15416">arXiv:2312.15416</a> <span> [<a href="https://arxiv.org/pdf/2312.15416">pdf</a>, <a href="https://arxiv.org/format/2312.15416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> On Completeness of SDP-Based Barrier Certificate Synthesis over Unbounded Domains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+H">Hao Wu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shenghua Feng</a>, <a href="/search/eess?searchtype=author&query=Gan%2C+T">Ting Gan</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jie Wang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+B">Bican Xia</a>, <a href="/search/eess?searchtype=author&query=Zhan%2C+N">Naijun Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.15416v4-abstract-short" style="display: inline;"> Barrier certificates, serving as differential invariants that witness system safety, play a crucial role in the verification of cyber-physical systems (CPS). Prevailing computational methods for synthesizing barrier certificates are based on semidefinite programming (SDP) by exploiting Putinar Positivstellensatz. Consequently, these approaches are limited by the Archimedean condition, which requir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15416v4-abstract-full').style.display = 'inline'; document.getElementById('2312.15416v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.15416v4-abstract-full" style="display: none;"> Barrier certificates, serving as differential invariants that witness system safety, play a crucial role in the verification of cyber-physical systems (CPS). Prevailing computational methods for synthesizing barrier certificates are based on semidefinite programming (SDP) by exploiting Putinar Positivstellensatz. Consequently, these approaches are limited by the Archimedean condition, which requires all variables to be bounded, i.e., systems are defined over bounded domains. For systems over unbounded domains, unfortunately, existing methods become incomplete and may fail to identify potential barrier certificates. In this paper, we address this limitation for the unbounded cases. We first give a complete characterization of polynomial barrier certificates by using homogenization, a recent technique in the optimization community to reduce an unbounded optimization problem to a bounded one. Furthermore, motivated by this formulation, we introduce the definition of homogenized systems and propose a complete characterization of a family of non-polynomial barrier certificates with more expressive power. Experimental results demonstrate that our two approaches are more effective while maintaining a comparable level of efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15416v4-abstract-full').style.display = 'none'; document.getElementById('2312.15416v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the 26th international symposium on Formal Methods (FM2024). 18 pages, 1 figure. Updated on 2024.7.9, fix two typos in Lemma 1 and Equation 10</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.07418">arXiv:2311.07418</a> <span> [<a href="https://arxiv.org/pdf/2311.07418">pdf</a>, <a href="https://arxiv.org/format/2311.07418">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Speech-based Slot Filling using Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sun%2C+G">Guangzhi Sun</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shutong Feng</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+D">Dongcheng Jiang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chao Zhang</a>, <a href="/search/eess?searchtype=author&query=Ga%C5%A1i%C4%87%2C+M">Milica Ga拧i膰</a>, <a href="/search/eess?searchtype=author&query=Woodland%2C+P+C">Philip C. Woodland</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.07418v1-abstract-short" style="display: inline;"> Recently, advancements in large language models (LLMs) have shown an unprecedented ability across various language tasks. This paper investigates the potential application of LLMs to slot filling with noisy ASR transcriptions, via both in-context learning and task-specific fine-tuning. Dedicated prompt designs and fine-tuning approaches are proposed to improve the robustness of LLMs for slot filli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07418v1-abstract-full').style.display = 'inline'; document.getElementById('2311.07418v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.07418v1-abstract-full" style="display: none;"> Recently, advancements in large language models (LLMs) have shown an unprecedented ability across various language tasks. This paper investigates the potential application of LLMs to slot filling with noisy ASR transcriptions, via both in-context learning and task-specific fine-tuning. Dedicated prompt designs and fine-tuning approaches are proposed to improve the robustness of LLMs for slot filling with noisy ASR transcriptions. Moreover, a linearised knowledge injection (LKI) scheme is also proposed to integrate dynamic external knowledge into LLMs. Experiments were performed on SLURP to quantify the performance of LLMs, including GPT-3.5-turbo, GPT-4, LLaMA-13B and Vicuna-13B (v1.1 and v1.5) with different ASR error rates. The use of the proposed fine-tuning together with the LKI scheme for LLaMA-13B achieved an 8.3% absolute SLU-F1 improvement compared to the strong Flan-T5-base baseline system on a limited data setup. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07418v1-abstract-full').style.display = 'none'; document.getElementById('2311.07418v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.00263">arXiv:2311.00263</a> <span> [<a href="https://arxiv.org/pdf/2311.00263">pdf</a>, <a href="https://arxiv.org/format/2311.00263">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> The bottleneck and ceiling effects in quantized tracking control of heterogeneous multi-agent systems under DoS attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuai Feng</a>, <a href="/search/eess?searchtype=author&query=Ran%2C+M">Maopeng Ran</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+B">Baoyong Zhang</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lihua Xie</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+S">Shengyuan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.00263v1-abstract-short" style="display: inline;"> In this paper, we investigate tracking control of heterogeneous multi-agent systems under Denial-of-Service (DoS) attacks and state quantization. Dynamic quantized mechanisms are designed for inter-follower communication and leader-follower communication. Zooming-in and out factors, and data rates of both mechanisms for preventing quantizer saturation are provided. Our results show that by tuning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00263v1-abstract-full').style.display = 'inline'; document.getElementById('2311.00263v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.00263v1-abstract-full" style="display: none;"> In this paper, we investigate tracking control of heterogeneous multi-agent systems under Denial-of-Service (DoS) attacks and state quantization. Dynamic quantized mechanisms are designed for inter-follower communication and leader-follower communication. Zooming-in and out factors, and data rates of both mechanisms for preventing quantizer saturation are provided. Our results show that by tuning the inter-follower quantized controller, one cannot improve the resilience beyond a level determined by the data rate of leader-follower quantized communication, i.e., the ceiling effect. Otherwise, overflow of followers' state quantizer can occur. On the other hand, if one selects a "large" data rate for leader-follower quantized communication, then the inter-follower quantized communication determines the resilience, and further increasing the data rate for leader-follower quantized communication cannot improve the resilience, i.e., the bottleneck effect. Simulation examples are provided to justify the results of our paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00263v1-abstract-full').style.display = 'none'; document.getElementById('2311.00263v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.05908">arXiv:2309.05908</a> <span> [<a href="https://arxiv.org/pdf/2309.05908">pdf</a>, <a href="https://arxiv.org/format/2309.05908">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Reset Controller Synthesis by Reach-avoid Analysis for Delay Hybrid Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Su%2C+H">Han Su</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+J">Jiyu Zhu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shenghua Feng</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+Y">Yunjun Bai</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+B">Bin Gu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jiang Liu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+M">Mengfei Yang</a>, <a href="/search/eess?searchtype=author&query=Zhan%2C+N">Naijun Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.05908v2-abstract-short" style="display: inline;"> A reset controller plays a crucial role in designing hybrid systems. It restricts the initial set and redefines the reset map associated with discrete transitions, in order to guarantee the system to achieve its objective. Reset controller synthesis, together with feedback controller synthesis and switching logic controller synthesis, provides a correct-by-construction approach to designing hybrid… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05908v2-abstract-full').style.display = 'inline'; document.getElementById('2309.05908v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.05908v2-abstract-full" style="display: none;"> A reset controller plays a crucial role in designing hybrid systems. It restricts the initial set and redefines the reset map associated with discrete transitions, in order to guarantee the system to achieve its objective. Reset controller synthesis, together with feedback controller synthesis and switching logic controller synthesis, provides a correct-by-construction approach to designing hybrid systems. However, time-delay is an inevitable factor in hybrid systems, which can degrade control performance and render verification certificates obtained by abstracting away time-delay invalid in practice. In this paper, we investigate this issue in a practical manner by taking time-delay into account. We propose an approach that reduces the synthesis of reset controllers to the generation of reach-avoid sets for the hybrid system under consideration, which can be efficiently solved using off-the-shell convex optimization solvers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05908v2-abstract-full').style.display = 'none'; document.getElementById('2309.05908v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.12617">arXiv:2308.12617</a> <span> [<a href="https://arxiv.org/pdf/2308.12617">pdf</a>, <a href="https://arxiv.org/ps/2308.12617">ps</a>, <a href="https://arxiv.org/format/2308.12617">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Quantized distributed Nash equilibrium seeking under DoS attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuai Feng</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+M">Maojiao Ye</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lihua Xie</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+S">Shengyuan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.12617v3-abstract-short" style="display: inline;"> This paper studies distributed Nash equilibrium (NE) seeking under Denial-of-Service (DoS) attacks and quantization. The players can only exchange information with their own direct neighbors. The transmitted information is subject to quantization and packet losses induced by malicious DoS attacks. We propose a quantized distributed NE seeking strategy based on the approach of dynamic quantized con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.12617v3-abstract-full').style.display = 'inline'; document.getElementById('2308.12617v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.12617v3-abstract-full" style="display: none;"> This paper studies distributed Nash equilibrium (NE) seeking under Denial-of-Service (DoS) attacks and quantization. The players can only exchange information with their own direct neighbors. The transmitted information is subject to quantization and packet losses induced by malicious DoS attacks. We propose a quantized distributed NE seeking strategy based on the approach of dynamic quantized consensus. To solve the quantizer saturation problem caused by DoS attacks, the quantization mechanism is equipped to have zooming-in and holding capabilities, in which the holding capability is consistent with the results in quantized consensus under DoS. A sufficient condition on the number of quantizer levels is provided, under which the quantizers are free from saturation under DoS attacks. The proposed distributed quantized NE seeking strategy is shown to have the so-called maximum resilience to DoS attacks. Namely, if the bound characterizing the maximum resilience is violated, an attacker can deny all the transmissions and hence distributed NE seeking is impossible. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.12617v3-abstract-full').style.display = 'none'; document.getElementById('2308.12617v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.15388">arXiv:2307.15388</a> <span> [<a href="https://arxiv.org/pdf/2307.15388">pdf</a>, <a href="https://arxiv.org/format/2307.15388">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Geophysics">physics.geo-ph</span> </div> </div> <p class="title is-5 mathjax"> An Empirical Study of Large-Scale Data-Driven Full Waveform Inversion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jin%2C+P">Peng Jin</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+Y">Yinan Feng</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shihang Feng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hanchen Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yinpeng Chen</a>, <a href="/search/eess?searchtype=author&query=Consolvo%2C+B">Benjamin Consolvo</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zicheng Liu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Youzuo Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.15388v2-abstract-short" style="display: inline;"> This paper investigates the impact of big data on deep learning models to help solve the full waveform inversion (FWI) problem. While it is well known that big data can boost the performance of deep learning models in many tasks, its effectiveness has not been validated for FWI. To address this gap, we present an empirical study that investigates how deep learning models in FWI behave when trained… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.15388v2-abstract-full').style.display = 'inline'; document.getElementById('2307.15388v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.15388v2-abstract-full" style="display: none;"> This paper investigates the impact of big data on deep learning models to help solve the full waveform inversion (FWI) problem. While it is well known that big data can boost the performance of deep learning models in many tasks, its effectiveness has not been validated for FWI. To address this gap, we present an empirical study that investigates how deep learning models in FWI behave when trained on OpenFWI, a collection of large-scale, multi-structural, synthetic datasets published recently. In particular, we train and evaluate the FWI models on a combination of 10 2D subsets in OpenFWI that contain 470K pairs of seismic data and velocity maps in total. Our experiments demonstrate that training on the combined dataset yields an average improvement of 13.03% in MAE, 7.19% in MSE and 1.87% in SSIM compared to each split dataset, and an average improvement of 28.60%, 21.55% and 8.22% in the leave-one-out generalization test. We further demonstrate that model capacity needs to scale in accordance with data size for optimal improvement, where our largest model yields an average improvement of 20.06%, 13.39% and 0.72% compared to the smallest one. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.15388v2-abstract-full').style.display = 'none'; document.getElementById('2307.15388v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.02982">arXiv:2306.02982</a> <span> [<a href="https://arxiv.org/pdf/2306.02982">pdf</a>, <a href="https://arxiv.org/format/2306.02982">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> PolyVoice: Language Models for Speech to Speech Translation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Dong%2C+Q">Qianqian Dong</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Z">Zhiying Huang</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+Q">Qiao Tian</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+C">Chen Xu</a>, <a href="/search/eess?searchtype=author&query=Ko%2C+T">Tom Ko</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yunlong Zhao</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Siyuan Feng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tang Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kexin Wang</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+X">Xuxin Cheng</a>, <a href="/search/eess?searchtype=author&query=Yue%2C+F">Fengpeng Yue</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+Y">Ye Bai</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+L">Lu Lu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Zejun Ma</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuping Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+M">Mingxuan Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.02982v2-abstract-short" style="display: inline;"> We propose PolyVoice, a language model-based framework for speech-to-speech translation (S2ST) system. Our framework consists of two language models: a translation language model and a speech synthesis language model. We use discretized speech units, which are generated in a fully unsupervised way, and thus our framework can be used for unwritten languages. For the speech synthesis part, we adopt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.02982v2-abstract-full').style.display = 'inline'; document.getElementById('2306.02982v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.02982v2-abstract-full" style="display: none;"> We propose PolyVoice, a language model-based framework for speech-to-speech translation (S2ST) system. Our framework consists of two language models: a translation language model and a speech synthesis language model. We use discretized speech units, which are generated in a fully unsupervised way, and thus our framework can be used for unwritten languages. For the speech synthesis part, we adopt the existing VALL-E X approach and build a unit-based audio language model. This grants our framework the ability to preserve the voice characteristics and the speaking style of the original speech. We examine our system on Chinese $\rightarrow$ English and English $\rightarrow$ Spanish pairs. Experimental results show that our system can generate speech with high translation quality and audio quality. Speech samples are available at https://speechtranslation.github.io/polyvoice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.02982v2-abstract-full').style.display = 'none'; document.getElementById('2306.02982v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.00279">arXiv:2306.00279</a> <span> [<a href="https://arxiv.org/pdf/2306.00279">pdf</a>, <a href="https://arxiv.org/format/2306.00279">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TCNS.2023.3281555">10.1109/TCNS.2023.3281555 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dynamic quantized consensus under DoS attacks: Towards a tight zooming-out factor </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuai Feng</a>, <a href="/search/eess?searchtype=author&query=Ran%2C+M">Maopeng Ran</a>, <a href="/search/eess?searchtype=author&query=Ishii%2C+H">Hideaki Ishii</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+S">Shengyuan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.00279v1-abstract-short" style="display: inline;"> This paper deals with dynamic quantized consensus of dynamical agents in a general form under packet losses induced by Denial-of-Service (DoS) attacks. The communication channel has limited bandwidth and hence the transmitted signals over the network are subject to quantization. To deal with agent's output, an observer is implemented at each node. The state of the observer is quantized by a finite… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00279v1-abstract-full').style.display = 'inline'; document.getElementById('2306.00279v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.00279v1-abstract-full" style="display: none;"> This paper deals with dynamic quantized consensus of dynamical agents in a general form under packet losses induced by Denial-of-Service (DoS) attacks. The communication channel has limited bandwidth and hence the transmitted signals over the network are subject to quantization. To deal with agent's output, an observer is implemented at each node. The state of the observer is quantized by a finite-level quantizer and then transmitted over the network. To solve the problem of quantizer overflow under malicious packet losses, a zooming-in and out dynamic quantization mechanism is designed. By the new quantized controller proposed in the paper, the zooming-out factor is lower bounded by the spectral radius of the agent's dynamic matrix. A sufficient condition of quantization range is provided under which the finite-level quantizer is free of overflow. A sufficient condition of tolerable DoS attacks for achieving consensus is also provided. At last, we study scalar dynamical agents as a special case and further tighten the zooming-out factor to a value smaller than the agent's dynamic parameter. Under such a zooming-out factor, it is possible to recover the level of tolerable DoS attacks to that of unquantized consensus, and the quantizer is free of overflow. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00279v1-abstract-full').style.display = 'none'; document.getElementById('2306.00279v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.15719">arXiv:2305.15719</a> <span> [<a href="https://arxiv.org/pdf/2305.15719">pdf</a>, <a href="https://arxiv.org/format/2305.15719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Efficient Neural Music Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lam%2C+M+W+Y">Max W. Y. Lam</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+Q">Qiao Tian</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tang Li</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+Z">Zongyu Yin</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Siyuan Feng</a>, <a href="/search/eess?searchtype=author&query=Tu%2C+M">Ming Tu</a>, <a href="/search/eess?searchtype=author&query=Ji%2C+Y">Yuliang Ji</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+R">Rui Xia</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+M">Mingbo Ma</a>, <a href="/search/eess?searchtype=author&query=Song%2C+X">Xuchen Song</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jitong Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuping Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.15719v1-abstract-short" style="display: inline;"> Recent progress in music generation has been remarkably advanced by the state-of-the-art MusicLM, which comprises a hierarchy of three LMs, respectively, for semantic, coarse acoustic, and fine acoustic modelings. Yet, sampling with the MusicLM requires processing through these LMs one by one to obtain the fine-grained acoustic tokens, making it computationally expensive and prohibitive for a real… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15719v1-abstract-full').style.display = 'inline'; document.getElementById('2305.15719v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.15719v1-abstract-full" style="display: none;"> Recent progress in music generation has been remarkably advanced by the state-of-the-art MusicLM, which comprises a hierarchy of three LMs, respectively, for semantic, coarse acoustic, and fine acoustic modelings. Yet, sampling with the MusicLM requires processing through these LMs one by one to obtain the fine-grained acoustic tokens, making it computationally expensive and prohibitive for a real-time generation. Efficient music generation with a quality on par with MusicLM remains a significant challenge. In this paper, we present MeLoDy (M for music; L for LM; D for diffusion), an LM-guided diffusion model that generates music audios of state-of-the-art quality meanwhile reducing 95.7% or 99.6% forward passes in MusicLM, respectively, for sampling 10s or 30s music. MeLoDy inherits the highest-level LM from MusicLM for semantic modeling, and applies a novel dual-path diffusion (DPD) model and an audio VAE-GAN to efficiently decode the conditioning semantic tokens into waveform. DPD is proposed to simultaneously model the coarse and fine acoustics by incorporating the semantic information into segments of latents effectively via cross-attention at each denoising step. Our experimental results suggest the superiority of MeLoDy, not only in its practical advantages on sampling speed and infinitely continuable generation, but also in its state-of-the-art musicality, audio quality, and text correlation. Our samples are available at https://Efficient-MeLoDy.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15719v1-abstract-full').style.display = 'none'; document.getElementById('2305.15719v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.13314">arXiv:2305.13314</a> <span> [<a href="https://arxiv.org/pdf/2305.13314">pdf</a>, <a href="https://arxiv.org/format/2305.13314">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Geophysics">physics.geo-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Auto-Linear Phenomenon in Subsurface Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+Y">Yinan Feng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yinpeng Chen</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+P">Peng Jin</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shihang Feng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zicheng Liu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Youzuo Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.13314v3-abstract-short" style="display: inline;"> Subsurface imaging involves solving full waveform inversion (FWI) to predict geophysical properties from measurements. This problem can be reframed as an image-to-image translation, with the usual approach being to train an encoder-decoder network using paired data from two domains: geophysical property and measurement. A recent seminal work (InvLINT) demonstrates there is only a linear mapping be… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13314v3-abstract-full').style.display = 'inline'; document.getElementById('2305.13314v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.13314v3-abstract-full" style="display: none;"> Subsurface imaging involves solving full waveform inversion (FWI) to predict geophysical properties from measurements. This problem can be reframed as an image-to-image translation, with the usual approach being to train an encoder-decoder network using paired data from two domains: geophysical property and measurement. A recent seminal work (InvLINT) demonstrates there is only a linear mapping between the latent spaces of the two domains, and the decoder requires paired data for training. This paper extends this direction by demonstrating that only linear mapping necessitates paired data, while both the encoder and decoder can be learned from their respective domains through self-supervised learning. This unveils an intriguing phenomenon (named Auto-Linear) where the self-learned features of two separate domains are automatically linearly correlated. Compared with existing methods, our Auto-Linear has four advantages: (a) solving both forward and inverse modeling simultaneously, (b) applicable to different subsurface imaging tasks and achieving markedly better results than previous methods, (c)enhanced performance, especially in scenarios with limited paired data and in the presence of noisy data, and (d) strong generalization ability of the trained encoder and decoder. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13314v3-abstract-full').style.display = 'none'; document.getElementById('2305.13314v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.11576">arXiv:2305.11576</a> <span> [<a href="https://arxiv.org/pdf/2305.11576">pdf</a>, <a href="https://arxiv.org/format/2305.11576">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Language-universal phonetic encoder for low-resource speech recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Siyuan Feng</a>, <a href="/search/eess?searchtype=author&query=Tu%2C+M">Ming Tu</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+R">Rui Xia</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuanzeng Huang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.11576v1-abstract-short" style="display: inline;"> Multilingual training is effective in improving low-resource ASR, which may partially be explained by phonetic representation sharing between languages. In end-to-end (E2E) ASR systems, graphemes are often used as basic modeling units, however graphemes may not be ideal for multilingual phonetic sharing. In this paper, we leverage International Phonetic Alphabet (IPA) based language-universal phon… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11576v1-abstract-full').style.display = 'inline'; document.getElementById('2305.11576v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.11576v1-abstract-full" style="display: none;"> Multilingual training is effective in improving low-resource ASR, which may partially be explained by phonetic representation sharing between languages. In end-to-end (E2E) ASR systems, graphemes are often used as basic modeling units, however graphemes may not be ideal for multilingual phonetic sharing. In this paper, we leverage International Phonetic Alphabet (IPA) based language-universal phonetic model to improve low-resource ASR performances, for the first time within the attention encoder-decoder architecture. We propose an adaptation method on the phonetic IPA model to further improve the proposed approach on extreme low-resource languages. Experiments carried out on the open-source MLS corpus and our internal databases show our approach outperforms baseline monolingual models and most state-of-the-art works. Our main approach and adaptation are effective on extremely low-resource languages, even within domain- and language-mismatched scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11576v1-abstract-full').style.display = 'none'; document.getElementById('2305.11576v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication in INTERSPEECH 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.11569">arXiv:2305.11569</a> <span> [<a href="https://arxiv.org/pdf/2305.11569">pdf</a>, <a href="https://arxiv.org/ps/2305.11569">ps</a>, <a href="https://arxiv.org/format/2305.11569">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Language-Universal Phonetic Representation in Multilingual Speech Pretraining for Low-Resource Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Siyuan Feng</a>, <a href="/search/eess?searchtype=author&query=Tu%2C+M">Ming Tu</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+R">Rui Xia</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuanzeng Huang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.11569v1-abstract-short" style="display: inline;"> We improve low-resource ASR by integrating the ideas of multilingual training and self-supervised learning. Concretely, we leverage an International Phonetic Alphabet (IPA) multilingual model to create frame-level pseudo labels for unlabeled speech, and use these pseudo labels to guide hidden-unit BERT (HuBERT) based speech pretraining in a phonetically-informed manner. The experiments on the Mult… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11569v1-abstract-full').style.display = 'inline'; document.getElementById('2305.11569v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.11569v1-abstract-full" style="display: none;"> We improve low-resource ASR by integrating the ideas of multilingual training and self-supervised learning. Concretely, we leverage an International Phonetic Alphabet (IPA) multilingual model to create frame-level pseudo labels for unlabeled speech, and use these pseudo labels to guide hidden-unit BERT (HuBERT) based speech pretraining in a phonetically-informed manner. The experiments on the Multilingual Speech (MLS) Corpus show that the proposed approach consistently outperforms the standard HuBERT on all the target languages. Moreover, on 3 of the 4 languages, comparing to the standard HuBERT, the approach performs better, meanwhile is able to save supervised training data by 1.5k hours (75%) at most. Our approach outperforms most of the state of the arts, with much less pretraining data in terms of hours and language diversity. Compared to XLSR-53 and a retraining based multilingual method, our approach performs better with full and limited finetuning data scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11569v1-abstract-full').style.display = 'none'; document.getElementById('2305.11569v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication in INTERSPEECH 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.14278">arXiv:2303.14278</a> <span> [<a href="https://arxiv.org/pdf/2303.14278">pdf</a>, <a href="https://arxiv.org/format/2303.14278">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Safe Hierarchical Navigation in Crowded Dynamic Uncertain Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+H">Hongyi Chen</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shiyu Feng</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Ye Zhao</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Changliu Liu</a>, <a href="/search/eess?searchtype=author&query=Vela%2C+P+A">Patricio A. Vela</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.14278v1-abstract-short" style="display: inline;"> This paper describes a hierarchical solution consisting of a multi-phase planner and a low-level safe controller to jointly solve the safe navigation problem in crowded, dynamic, and uncertain environments. The planner employs dynamic gap analysis and trajectory optimization to achieve collision avoidance with respect to the predicted trajectories of dynamic agents within the sensing and planning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14278v1-abstract-full').style.display = 'inline'; document.getElementById('2303.14278v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.14278v1-abstract-full" style="display: none;"> This paper describes a hierarchical solution consisting of a multi-phase planner and a low-level safe controller to jointly solve the safe navigation problem in crowded, dynamic, and uncertain environments. The planner employs dynamic gap analysis and trajectory optimization to achieve collision avoidance with respect to the predicted trajectories of dynamic agents within the sensing and planning horizon and with robustness to agent uncertainty. To address uncertainty over the planning horizon and real-time safety, a fast reactive safe set algorithm (SSA) is adopted, which monitors and modifies the unsafe control during trajectory tracking. Compared to other existing methods, our approach offers theoretical guarantees of safety and achieves collision-free navigation with higher probability in uncertain environments, as demonstrated in scenarios with 20 and 50 dynamic agents. Project website: https://hychen-naza.github.io/projects/HDAGap/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14278v1-abstract-full').style.display = 'none'; document.getElementById('2303.14278v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.08243">arXiv:2303.08243</a> <span> [<a href="https://arxiv.org/pdf/2303.08243">pdf</a>, <a href="https://arxiv.org/format/2303.08243">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Safer Gap: A Gap-based Local Planner for Safe Navigation with Nonholonomic Mobile Robots </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shiyu Feng</a>, <a href="/search/eess?searchtype=author&query=Abuaish%2C+A">Ahmad Abuaish</a>, <a href="/search/eess?searchtype=author&query=Vela%2C+P+A">Patricio A. Vela</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.08243v1-abstract-short" style="display: inline;"> This paper extends the gap-based navigation technique in Potential Gap by guaranteeing safety for nonholonomic robots for all tiers of the local planner hierarchy, so called Safer Gap. The first tier generates a Bezier-based collision-free path through gaps. A subset of navigable free-space from the robot through a gap, called the keyhole, is defined to be the union of the largest collision-free d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08243v1-abstract-full').style.display = 'inline'; document.getElementById('2303.08243v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.08243v1-abstract-full" style="display: none;"> This paper extends the gap-based navigation technique in Potential Gap by guaranteeing safety for nonholonomic robots for all tiers of the local planner hierarchy, so called Safer Gap. The first tier generates a Bezier-based collision-free path through gaps. A subset of navigable free-space from the robot through a gap, called the keyhole, is defined to be the union of the largest collision-free disc centered on the robot and a trapezoidal region directed through the gap. It is encoded by a shallow neural network zeroing barrier function (ZBF). Nonlinear model predictive control (NMPC), with Keyhole ZBF constraints and output tracking of the Bezier path, synthesizes a safe kinematically-feasible trajectory. Low-level use of the Keyhole ZBF within a point-wise optimization-based safe control synthesis module serves as a final safety layer. Simulation and experimental validation of Safer Gap confirm its collision-free navigation properties. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08243v1-abstract-full').style.display = 'none'; document.getElementById('2303.08243v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IROS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.08010">arXiv:2302.08010</a> <span> [<a href="https://arxiv.org/pdf/2302.08010">pdf</a>, <a href="https://arxiv.org/format/2302.08010">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Achieving Covert Communication in Large-Scale SWIPT-Enabled D2D Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shaohan Feng</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+X">Xiao Lu</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Hossain%2C+E">Ekram Hossain</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+S">Sumei Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.08010v1-abstract-short" style="display: inline;"> We aim to secure a large-scale device-to-device (D2D) network against adversaries. The D2D network underlays a downlink cellular network to reuse the cellular spectrum and is enabled for simultaneous wireless information and power transfer (SWIPT). In the D2D network, the transmitters communicate with the receivers, and the receivers extract information and energy from their received radio-frequen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08010v1-abstract-full').style.display = 'inline'; document.getElementById('2302.08010v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.08010v1-abstract-full" style="display: none;"> We aim to secure a large-scale device-to-device (D2D) network against adversaries. The D2D network underlays a downlink cellular network to reuse the cellular spectrum and is enabled for simultaneous wireless information and power transfer (SWIPT). In the D2D network, the transmitters communicate with the receivers, and the receivers extract information and energy from their received radio-frequency (RF) signals. In the meantime, the adversaries aim to detect the D2D transmission. The D2D network applies power control and leverages the cellular signal to achieve covert communication (i.e., hide the presence of transmissions) so as to defend against the adversaries. We model the interaction between the D2D network and adversaries by using a two-stage Stackelberg game. Therein, the adversaries are the followers minimizing their detection errors at the lower stage and the D2D network is the leader maximizing its network utility constrained by the communication covertness and power outage at the upper stage. Both power splitting (PS)-based and time switch (TS)-based SWIPT schemes are explored. We characterize the spatial configuration of the large-scale D2D network, adversaries, and cellular network by stochastic geometry. We analyze the adversary's detection error minimization problem and adopt the Rosenbrock method to solve it, where the obtained solution is the best response from the lower stage. Taking into account the best response from the lower stage, we develop a bi-level algorithm to solve the D2D network's constrained network utility maximization problem and obtain the Stackelberg equilibrium. We present numerical results to reveal interesting insights. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08010v1-abstract-full').style.display = 'none'; document.getElementById('2302.08010v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.01745">arXiv:2302.01745</a> <span> [<a href="https://arxiv.org/pdf/2302.01745">pdf</a>, <a href="https://arxiv.org/format/2302.01745">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Covert D2D Communication Underlaying Cellular Network: A System-Level Security Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shaohan Feng</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+X">Xiao Lu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+K">Kun Zhu</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+P">Ping Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.01745v1-abstract-short" style="display: inline;"> In this paper, we aim to secure the D2D communication of the D2D-underlaid cellular network by leveraging covert communication to hide its presence from the vigilant adversary. In particular, there are adversaries aiming to detect D2D communications based on their received signal powers. To avoid being detected, the legitimate entity, i.e., D2D-underlaid cellular network, performs power control so… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.01745v1-abstract-full').style.display = 'inline'; document.getElementById('2302.01745v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.01745v1-abstract-full" style="display: none;"> In this paper, we aim to secure the D2D communication of the D2D-underlaid cellular network by leveraging covert communication to hide its presence from the vigilant adversary. In particular, there are adversaries aiming to detect D2D communications based on their received signal powers. To avoid being detected, the legitimate entity, i.e., D2D-underlaid cellular network, performs power control so as to hide the presence of the D2D communication. We model the combat between the adversaries and the legitimate entity as a two-stage Stackelberg game. Therein, the adversaries are the followers and aim to minimize their detection errors at the lower stage while the legitimate entity is the leader and aims to maximize its utility constrained by the D2D communication covertness and the cellular quality of service (QoS) at the upper stage. Different from the conventional works, the study of the combat is conducted from the system-level perspective, where the scenario that a large-scale D2D-underlaid cellular network threatened by massive spatially distributed adversaries is considered and the network spatial configuration is modeled by stochastic geometry. We obtain the adversary's optimal strategy as the best response from the lower stage and also both analytically and numerically verify its optimality. Taking into account the best response from the lower stage, we design a bi-level algorithm based on the successive convex approximation (SCA) method to search for the optimal strategy of the legitimate entity, which together with the best response from the lower stage constitute the Stackelberg equilibrium. Numerical results are presented to evaluate the network performance and reveal practical insights that instead of improving the legitimate utility by strengthening the D2D link reliability, increasing D2D transmission power will degrade it due to the security concern. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.01745v1-abstract-full').style.display = 'none'; document.getElementById('2302.01745v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.08391">arXiv:2212.08391</a> <span> [<a href="https://arxiv.org/pdf/2212.08391">pdf</a>, <a href="https://arxiv.org/ps/2212.08391">ps</a>, <a href="https://arxiv.org/format/2212.08391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Enhanced-rate Iterative Beamformers for Active IRS-assisted Wireless Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yeqing Lin</a>, <a href="/search/eess?searchtype=author&query=Shu%2C+F">Feng Shu</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+R">Rongen Dong</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+R">Riqing Chen</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Siling Feng</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+W">Weiping Shi</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jing Liu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiangzhou Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.08391v2-abstract-short" style="display: inline;"> Compared to passive intelligent reflecting surface (IRS), active IRS is viewed as a more efficient promising technique to combat the double-fading impact in IRS-aided wireless network. In this paper, in order to boost the achievable rate of user in such a wireless network, three enhanced-rate iterative beamforming methods are proposed by designing the amplifying factors and the corresponding phase… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08391v2-abstract-full').style.display = 'inline'; document.getElementById('2212.08391v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.08391v2-abstract-full" style="display: none;"> Compared to passive intelligent reflecting surface (IRS), active IRS is viewed as a more efficient promising technique to combat the double-fading impact in IRS-aided wireless network. In this paper, in order to boost the achievable rate of user in such a wireless network, three enhanced-rate iterative beamforming methods are proposed by designing the amplifying factors and the corresponding phases at active IRS. The first method, maximizing the simplified signal-to-noise ratio (Max-SSNR) is designed by omitting the cross-term in the definition of rate. Using the Rayleigh-Ritz (RR) theorem, Max-SSNR-RR is proposed to iteratively optimize the norm of beamforming vector and its associated normalized vector. In addition, generalized maximum ratio reflection (GMRR) is presented with a closed-form expression, which is motivated by the maximum ratio combining. To further improve rate, maximizing SNR (Max-SNR) is designed by fractional programming (FP), which is called Max-SNR-FP. Simulation results show that the proposed three methods make an obvious rate enhancement over Max-reflecting signal-to-noise ratio (Max-RSNR), maximum ratio reflection (MRR), selective ratio reflecting (SRR), equal gain reflection (EGR) and passive IRS, and are in increasing order of rate performance as follows: Max-SSNR-RR, GMRR, and Max-SNR-FP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08391v2-abstract-full').style.display = 'none'; document.getElementById('2212.08391v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.00517">arXiv:2212.00517</a> <span> [<a href="https://arxiv.org/pdf/2212.00517">pdf</a>, <a href="https://arxiv.org/format/2212.00517">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TITS.2023.3317078">10.1109/TITS.2023.3317078 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Adaptive Safety Evaluation for Connected and Automated Vehicles with Sparse Control Variates </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jingxuan Yang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+H">Haowei Sun</a>, <a href="/search/eess?searchtype=author&query=He%2C+H">Honglin He</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuo Feng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H+X">Henry X. Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.00517v1-abstract-short" style="display: inline;"> Safety performance evaluation is critical for developing and deploying connected and automated vehicles (CAVs). One prevailing way is to design testing scenarios using prior knowledge of CAVs, test CAVs in these scenarios, and then evaluate their safety performances. However, significant differences between CAVs and prior knowledge could severely reduce the evaluation efficiency. Towards addressin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.00517v1-abstract-full').style.display = 'inline'; document.getElementById('2212.00517v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.00517v1-abstract-full" style="display: none;"> Safety performance evaluation is critical for developing and deploying connected and automated vehicles (CAVs). One prevailing way is to design testing scenarios using prior knowledge of CAVs, test CAVs in these scenarios, and then evaluate their safety performances. However, significant differences between CAVs and prior knowledge could severely reduce the evaluation efficiency. Towards addressing this issue, most existing studies focus on the adaptive design of testing scenarios during the CAV testing process, but so far they cannot be applied to high-dimensional scenarios. In this paper, we focus on the adaptive safety performance evaluation by leveraging the testing results, after the CAV testing process. It can significantly improve the evaluation efficiency and be applied to high-dimensional scenarios. Specifically, instead of directly evaluating the unknown quantity (e.g., crash rates) of CAV safety performances, we evaluate the differences between the unknown quantity and known quantity (i.e., control variates). By leveraging the testing results, the control variates could be well designed and optimized such that the differences are close to zero, so the evaluation variance could be dramatically reduced for different CAVs. To handle the high-dimensional scenarios, we propose the sparse control variates method, where the control variates are designed only for the sparse and critical variables of scenarios. According to the number of critical variables in each scenario, the control variates are stratified into strata and optimized within each stratum using multiple linear regression techniques. We justify the proposed method's effectiveness by rigorous theoretical analysis and empirical study of high-dimensional overtaking scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.00517v1-abstract-full').style.display = 'none'; document.getElementById('2212.00517v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.10506">arXiv:2210.10506</a> <span> [<a href="https://arxiv.org/pdf/2210.10506">pdf</a>, <a href="https://arxiv.org/format/2210.10506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Audio Tampering Detection Based on Shallow and Deep Feature Representation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhifeng Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yao Yang</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+C">Chunyan Zeng</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+S">Shuai Kong</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shixiong Feng</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+N">Nan Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.10506v1-abstract-short" style="display: inline;"> Digital audio tampering detection can be used to verify the authenticity of digital audio. However, most current methods use standard electronic network frequency (ENF) databases for visual comparison analysis of ENF continuity of digital audio or perform feature extraction for classification by machine learning methods. ENF databases are usually tricky to obtain, visual methods have weak feature… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10506v1-abstract-full').style.display = 'inline'; document.getElementById('2210.10506v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.10506v1-abstract-full" style="display: none;"> Digital audio tampering detection can be used to verify the authenticity of digital audio. However, most current methods use standard electronic network frequency (ENF) databases for visual comparison analysis of ENF continuity of digital audio or perform feature extraction for classification by machine learning methods. ENF databases are usually tricky to obtain, visual methods have weak feature representation, and machine learning methods have more information loss in features, resulting in low detection accuracy. This paper proposes a fusion method of shallow and deep features to fully use ENF information by exploiting the complementary nature of features at different levels to more accurately describe the changes in inconsistency produced by tampering operations to raw digital audio. The method achieves 97.03% accuracy on three classic databases: Carioca 1, Carioca 2, and New Spanish. In addition, we have achieved an accuracy of 88.31% on the newly constructed database GAUDI-DI. Experimental results show that the proposed method is superior to the state-of-the-art method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10506v1-abstract-full').style.display = 'none'; document.getElementById('2210.10506v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Audio tampering detection, 21 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.15170">arXiv:2209.15170</a> <span> [<a href="https://arxiv.org/pdf/2209.15170">pdf</a>, <a href="https://arxiv.org/format/2209.15170">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Securing Large-Scale D2D Networks Using Covert Communication and Friendly Jamming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shaohan Feng</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+X">Xiao Lu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+S">Sumei Sun</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Hossain%2C+E">Ekram Hossain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.15170v1-abstract-short" style="display: inline;"> We exploit both covert communication and friendly jamming to propose a friendly jamming-assisted covert communication and use it to doubly secure a large-scale device-to-device (D2D) network against eavesdroppers (i.e., wardens). The D2D transmitters defend against the wardens by: 1) hiding their transmissions with enhanced covert communication, and 2) leveraging friendly jamming to ensure informa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.15170v1-abstract-full').style.display = 'inline'; document.getElementById('2209.15170v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.15170v1-abstract-full" style="display: none;"> We exploit both covert communication and friendly jamming to propose a friendly jamming-assisted covert communication and use it to doubly secure a large-scale device-to-device (D2D) network against eavesdroppers (i.e., wardens). The D2D transmitters defend against the wardens by: 1) hiding their transmissions with enhanced covert communication, and 2) leveraging friendly jamming to ensure information secrecy even if the D2D transmissions are detected. We model the combat between the wardens and the D2D network (the transmitters and the friendly jammers) as a two-stage Stackelberg game. Therein, the wardens are the followers at the lower stage aiming to minimize their detection errors, and the D2D network is the leader at the upper stage aiming to maximize its utility (in terms of link reliability and communication security) subject to the constraint on communication covertness. We apply stochastic geometry to model the network spatial configuration so as to conduct a system-level study. We develop a bi-level optimization algorithm to search for the equilibrium of the proposed Stackelberg game based on the successive convex approximation (SCA) method and Rosenbrock method. Numerical results reveal interesting insights. We observe that without the assistance from the jammers, it is difficult to achieve covert communication on D2D transmission. Moreover, we illustrate the advantages of the proposed friendly jamming-assisted covert communication by comparing it with the information-theoretical secrecy approach in terms of the secure communication probability and network utility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.15170v1-abstract-full').style.display = 'none'; document.getElementById('2209.15170v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.00196">arXiv:2209.00196</a> <span> [<a href="https://arxiv.org/pdf/2209.00196">pdf</a>, <a href="https://arxiv.org/format/2209.00196">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Group frame neural network of moving object ghost imaging combined with frame merging algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+D">Da Chen</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shan-Guo Feng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hua-Hua Wang</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+J">Jia-Ning Cao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhi-Wei Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Z">Zhi-Xin Yang</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+A">Ao Yan</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+L">Lu Gao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Ze Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.00196v1-abstract-short" style="display: inline;"> The nature of multiple samples to extract correlation information limits the applications of ghost imaging of moving objects. A novel multi-to-one neural network is proposed and the concept of "batch frame" is introduced to improve the serial imaging method. The neural network extracts more correlation information from a small number of samples, thus reducing the sampling ratio of the ghost imagin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.00196v1-abstract-full').style.display = 'inline'; document.getElementById('2209.00196v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.00196v1-abstract-full" style="display: none;"> The nature of multiple samples to extract correlation information limits the applications of ghost imaging of moving objects. A novel multi-to-one neural network is proposed and the concept of "batch frame" is introduced to improve the serial imaging method. The neural network extracts more correlation information from a small number of samples, thus reducing the sampling ratio of the ghost imaging technique. We combine the correlation characteristics between images to propose a frame merging algorithm, which eliminates the dynamic blur of high-speed moving objects and further improves the reconstruction quality of moving object images at a low sampling ratio. The experimental results are consistent with the simulation results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.00196v1-abstract-full').style.display = 'none'; document.getElementById('2209.00196v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.12753">arXiv:2208.12753</a> <span> [<a href="https://arxiv.org/pdf/2208.12753">pdf</a>, <a href="https://arxiv.org/format/2208.12753">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Spatio-Temporal Representation Learning Enhanced Source Cell-phone Recognition from Speech Recordings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zeng%2C+C">Chunyan Zeng</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shixiong Feng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhifeng Wang</a>, <a href="/search/eess?searchtype=author&query=Wan%2C+X">Xiangkui Wan</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yunfan Chen</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+N">Nan Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.12753v1-abstract-short" style="display: inline;"> The existing source cell-phone recognition method lacks the long-term feature characterization of the source device, resulting in inaccurate representation of the source cell-phone related features which leads to insufficient recognition accuracy. In this paper, we propose a source cell-phone recognition method based on spatio-temporal representation learning, which includes two main parts: extrac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.12753v1-abstract-full').style.display = 'inline'; document.getElementById('2208.12753v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.12753v1-abstract-full" style="display: none;"> The existing source cell-phone recognition method lacks the long-term feature characterization of the source device, resulting in inaccurate representation of the source cell-phone related features which leads to insufficient recognition accuracy. In this paper, we propose a source cell-phone recognition method based on spatio-temporal representation learning, which includes two main parts: extraction of sequential Gaussian mean matrix features and construction of a recognition model based on spatio-temporal representation learning. In the feature extraction part, based on the analysis of time-series representation of recording source signals, we extract sequential Gaussian mean matrix with long-term and short-term representation ability by using the sensitivity of Gaussian mixture model to data distribution. In the model construction part, we design a structured spatio-temporal representation learning network C3D-BiLSTM to fully characterize the spatio-temporal information, combine 3D convolutional network and bidirectional long short-term memory network for short-term spectral information and long-time fluctuation information representation learning, and achieve accurate recognition of cell-phones by fusing spatio-temporal feature information of recording source signals. The method achieves an average accuracy of 99.03% for the closed-set recognition of 45 cell-phones under the CCNU\_Mobile dataset, and 98.18% in small sample size experiments, with recognition performance better than the existing state-of-the-art methods. The experimental results show that the method exhibits excellent recognition performance in multi-class cell-phones recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.12753v1-abstract-full').style.display = 'none'; document.getElementById('2208.12753v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.09259">arXiv:2207.09259</a> <span> [<a href="https://arxiv.org/pdf/2207.09259">pdf</a>, <a href="https://arxiv.org/format/2207.09259">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Testing for Connected and Automated Vehicles with Sparse Control Variates in Overtaking Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jingxuan Yang</a>, <a href="/search/eess?searchtype=author&query=He%2C+H">Honglin He</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuo Feng</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H+X">Henry X. Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.09259v1-abstract-short" style="display: inline;"> Testing and evaluation is a critical step in the development and deployment of connected and automated vehicles (CAVs). Due to the black-box property and various types of CAVs, how to test and evaluate CAVs adaptively remains a major challenge. Many approaches have been proposed to adaptively generate testing scenarios during the testing process. However, most existing approaches cannot be applied… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09259v1-abstract-full').style.display = 'inline'; document.getElementById('2207.09259v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.09259v1-abstract-full" style="display: none;"> Testing and evaluation is a critical step in the development and deployment of connected and automated vehicles (CAVs). Due to the black-box property and various types of CAVs, how to test and evaluate CAVs adaptively remains a major challenge. Many approaches have been proposed to adaptively generate testing scenarios during the testing process. However, most existing approaches cannot be applied to complex scenarios, where the variables needed to define such scenarios are high dimensional. Towards filling this gap, the adaptive testing with sparse control variates method is proposed in this paper. Instead of adaptively generating testing scenarios, our approach evaluates CAVs' performances by adaptively utilizing the testing results. Specifically, each testing result is adjusted using multiple linear regression techniques based on control variates. As the regression coefficients can be adaptively optimized for the CAV under test, using the adjusted results can reduce the estimation variance, compared with using the testing results directly. To overcome the high dimensionality challenge, sparse control variates are utilized only for the critical variables of testing scenarios. To validate the proposed method, the high-dimensional overtaking scenarios are investigated, and the results demonstrate that our approach can further accelerate the evaluation process by about 30 times. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09259v1-abstract-full').style.display = 'none'; document.getElementById('2207.09259v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.08332">arXiv:2207.08332</a> <span> [<a href="https://arxiv.org/pdf/2207.08332">pdf</a>, <a href="https://arxiv.org/format/2207.08332">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Quantized Consensus under Data-Rate Constraints and DoS Attacks: A Zooming-In and Holding Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ran%2C+M">Maopeng Ran</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuai Feng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Juncheng Li</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lihua Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.08332v1-abstract-short" style="display: inline;"> This paper is concerned with the quantized consensus problem for uncertain nonlinear multi-agent systems under data-rate constraints and Denial-of-Service (DoS) attacks. The agents are modeled in strict-feedback form with unknown nonlinear dynamics and external disturbance. Extended state observers (ESOs) are leveraged to estimate agents' total uncertainties along with their states. To mitigate th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.08332v1-abstract-full').style.display = 'inline'; document.getElementById('2207.08332v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.08332v1-abstract-full" style="display: none;"> This paper is concerned with the quantized consensus problem for uncertain nonlinear multi-agent systems under data-rate constraints and Denial-of-Service (DoS) attacks. The agents are modeled in strict-feedback form with unknown nonlinear dynamics and external disturbance. Extended state observers (ESOs) are leveraged to estimate agents' total uncertainties along with their states. To mitigate the effects of DoS attacks, a novel dynamic quantization with zooming-in and holding capabilities is proposed. The idea is to zoom-in and hold the variable to be quantized if the system is in the absence and presence of DoS attacks, respectively. The control protocol is given in terms of the outputs of the ESOs and the dynamic-quantization-based encoders and decoders. We show that, for a connected undirected network, the developed control protocol is capable of handling any DoS attacks inducing bounded consecutive packet losses with merely 3-level quantization. The application of the zooming-in and holding approach to known linear multi-agent systems is also discussed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.08332v1-abstract-full').style.display = 'none'; document.getElementById('2207.08332v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.01430">arXiv:2207.01430</a> <span> [<a href="https://arxiv.org/pdf/2207.01430">pdf</a>, <a href="https://arxiv.org/ps/2207.01430">ps</a>, <a href="https://arxiv.org/format/2207.01430">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Krasovskii and Shifted Passivity Based Output Consensus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Kawano%2C+Y">Yu Kawano</a>, <a href="/search/eess?searchtype=author&query=Cucuzzella%2C+M">Michele Cucuzzella</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuai Feng</a>, <a href="/search/eess?searchtype=author&query=Scherpen%2C+J+M+A">Jacquelien M. A. Scherpen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.01430v1-abstract-short" style="display: inline;"> Motivated by current sharing in power networks, we consider a class of output consensus (also called agreement) problems for nonlinear systems, where the consensus value is determined by external disturbances, e.g., power demand. This output consensus problem is solved by a simple distributed output feedback controller if a system is either Krasovskii or shifted passive, which is the only essentia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.01430v1-abstract-full').style.display = 'inline'; document.getElementById('2207.01430v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.01430v1-abstract-full" style="display: none;"> Motivated by current sharing in power networks, we consider a class of output consensus (also called agreement) problems for nonlinear systems, where the consensus value is determined by external disturbances, e.g., power demand. This output consensus problem is solved by a simple distributed output feedback controller if a system is either Krasovskii or shifted passive, which is the only essential requirement. The effectiveness of the proposed controller is shown in simulation on an islanded DC power network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.01430v1-abstract-full').style.display = 'none'; document.getElementById('2207.01430v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.00105">arXiv:2206.00105</a> <span> [<a href="https://arxiv.org/pdf/2206.00105">pdf</a>, <a href="https://arxiv.org/format/2206.00105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5121/csit.2022.120901">10.5121/csit.2022.120901 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep learning pipeline for image classification on mobile phones </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Muneeb%2C+M">Muhammad Muneeb</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S+F">Samuel F. Feng</a>, <a href="/search/eess?searchtype=author&query=Henschel%2C+A">Andreas Henschel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.00105v1-abstract-short" style="display: inline;"> This article proposes and documents a machine-learning framework and tutorial for classifying images using mobile phones. Compared to computers, the performance of deep learning model performance degrades when deployed on a mobile phone and requires a systematic approach to find a model that performs optimally on both computers and mobile phones. By following the proposed pipeline, which consists… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.00105v1-abstract-full').style.display = 'inline'; document.getElementById('2206.00105v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.00105v1-abstract-full" style="display: none;"> This article proposes and documents a machine-learning framework and tutorial for classifying images using mobile phones. Compared to computers, the performance of deep learning model performance degrades when deployed on a mobile phone and requires a systematic approach to find a model that performs optimally on both computers and mobile phones. By following the proposed pipeline, which consists of various computational tools, simple procedural recipes, and technical considerations, one can bring the power of deep learning medical image classification to mobile devices, potentially unlocking new domains of applications. The pipeline is demonstrated on four different publicly available datasets: COVID X-rays, COVID CT scans, leaves, and colorectal cancer. We used two application development frameworks: TensorFlow Lite (real-time testing) and Flutter (digital image testing) to test the proposed pipeline. We found that transferring deep learning models to a mobile phone is limited by hardware and classification accuracy drops. To address this issue, we proposed this pipeline to find an optimized model for mobile phones. Finally, we discuss additional applications and computational concerns related to deploying deep-learning models on phones, including real-time analysis and image preprocessing. We believe the associated documentation and code can help physicians and medical experts develop medical image classification applications for distribution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.00105v1-abstract-full').style.display = 'none'; document.getElementById('2206.00105v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 9th International Conference on Artificial Intelligence and Applications (AIAPP 2022) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.12633">arXiv:2205.12633</a> <span> [<a href="https://arxiv.org/pdf/2205.12633">pdf</a>, <a href="https://arxiv.org/format/2205.12633">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2022 Challenge on High Dynamic Range Imaging: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=P%C3%A9rez-Pellitero%2C+E">Eduardo P茅rez-Pellitero</a>, <a href="/search/eess?searchtype=author&query=Catley-Chandar%2C+S">Sibi Catley-Chandar</a>, <a href="/search/eess?searchtype=author&query=Shaw%2C+R">Richard Shaw</a>, <a href="/search/eess?searchtype=author&query=Leonardis%2C+A">Ale拧 Leonardis</a>, <a href="/search/eess?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zexin Zhang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Cen Liu</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+Y">Yunbo Peng</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yue Lin</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+G">Gaocheng Yu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jin Zhang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Zhe Ma</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hongbin Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xiangyu Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xintao Wang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+H">Haiwei Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+L">Lin Liu</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+C">Chao Dong</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+J">Jiantao Zhou</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Q">Qingsen Yan</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Song Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weiye Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yuhang Liu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhen Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yanning Zhang</a> , et al. (68 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.12633v1-abstract-short" style="display: inline;"> This paper reviews the challenge on constrained high dynamic range (HDR) imaging that was part of the New Trends in Image Restoration and Enhancement (NTIRE) workshop, held in conjunction with CVPR 2022. This manuscript focuses on the competition set-up, datasets, the proposed methods and their results. The challenge aims at estimating an HDR image from multiple respective low dynamic range (LDR)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12633v1-abstract-full').style.display = 'inline'; document.getElementById('2205.12633v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.12633v1-abstract-full" style="display: none;"> This paper reviews the challenge on constrained high dynamic range (HDR) imaging that was part of the New Trends in Image Restoration and Enhancement (NTIRE) workshop, held in conjunction with CVPR 2022. This manuscript focuses on the competition set-up, datasets, the proposed methods and their results. The challenge aims at estimating an HDR image from multiple respective low dynamic range (LDR) observations, which might suffer from under- or over-exposed regions and different sources of noise. The challenge is composed of two tracks with an emphasis on fidelity and complexity constraints: In Track 1, participants are asked to optimize objective fidelity scores while imposing a low-complexity constraint (i.e. solutions can not exceed a given number of operations). In Track 2, participants are asked to minimize the complexity of their solutions while imposing a constraint on fidelity scores (i.e. solutions are required to obtain a higher fidelity score than the prescribed baseline). Both tracks use the same data and metrics: Fidelity is measured by means of PSNR with respect to a ground-truth HDR image (computed both directly and with a canonical tonemapping operation), while complexity metrics include the number of Multiply-Accumulate (MAC) operations and runtime (in seconds). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12633v1-abstract-full').style.display = 'none'; document.getElementById('2205.12633v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR Workshops 2022. 15 pages, 21 figures, 2 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.13731">arXiv:2204.13731</a> <span> [<a href="https://arxiv.org/pdf/2204.13731">pdf</a>, <a href="https://arxiv.org/format/2204.13731">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Geophysics">physics.geo-ph</span> </div> </div> <p class="title is-5 mathjax"> An Intriguing Property of Geophysics Inversion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Feng%2C+Y">Yinan Feng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yinpeng Chen</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shihang Feng</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+P">Peng Jin</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zicheng Liu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Youzuo Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.13731v2-abstract-short" style="display: inline;"> Inversion techniques are widely used to reconstruct subsurface physical properties (e.g., velocity, conductivity) from surface-based geophysical measurements (e.g., seismic, electric/magnetic (EM) data). The problems are governed by partial differential equations (PDEs) like the wave or Maxwell's equations. Solving geophysical inversion problems is challenging due to the ill-posedness and high com… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.13731v2-abstract-full').style.display = 'inline'; document.getElementById('2204.13731v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.13731v2-abstract-full" style="display: none;"> Inversion techniques are widely used to reconstruct subsurface physical properties (e.g., velocity, conductivity) from surface-based geophysical measurements (e.g., seismic, electric/magnetic (EM) data). The problems are governed by partial differential equations (PDEs) like the wave or Maxwell's equations. Solving geophysical inversion problems is challenging due to the ill-posedness and high computational cost. To alleviate those issues, recent studies leverage deep neural networks to learn the inversion mappings from measurements to the property directly. In this paper, we show that such a mapping can be well modeled by a very shallow (but not wide) network with only five layers. This is achieved based on our new finding of an intriguing property: a near-linear relationship between the input and output, after applying integral transform in high dimensional space. In particular, when dealing with the inversion from seismic data to subsurface velocity governed by a wave equation, the integral results of velocity with Gaussian kernels are linearly correlated to the integral of seismic data with sine kernels. Furthermore, this property can be easily turned into a light-weight encoder-decoder network for inversion. The encoder contains the integration of seismic data and the linear transformation without need for fine-tuning. The decoder only consists of a single transformer block to reverse the integral of velocity. Experiments show that this interesting property holds for two geophysics inversion problems over four different datasets. Compared to much deeper InversionNet, our method achieves comparable accuracy, but consumes significantly fewer parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.13731v2-abstract-full').style.display = 'none'; document.getElementById('2204.13731v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.03178">arXiv:2204.03178</a> <span> [<a href="https://arxiv.org/pdf/2204.03178">pdf</a>, <a href="https://arxiv.org/format/2204.03178">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> 3M: Multi-loss, Multi-path and Multi-level Neural Networks for speech recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=You%2C+Z">Zhao You</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shulin Feng</a>, <a href="/search/eess?searchtype=author&query=Su%2C+D">Dan Su</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.03178v2-abstract-short" style="display: inline;"> Recently, Conformer based CTC/AED model has become a mainstream architecture for ASR. In this paper, based on our prior work, we identify and integrate several approaches to achieve further improvements for ASR tasks, which we denote as multi-loss, multi-path and multi-level, summarized as "3M" model. Specifically, multi-loss refers to the joint CTC/AED loss and multi-path denotes the Mixture-of-E… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.03178v2-abstract-full').style.display = 'inline'; document.getElementById('2204.03178v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.03178v2-abstract-full" style="display: none;"> Recently, Conformer based CTC/AED model has become a mainstream architecture for ASR. In this paper, based on our prior work, we identify and integrate several approaches to achieve further improvements for ASR tasks, which we denote as multi-loss, multi-path and multi-level, summarized as "3M" model. Specifically, multi-loss refers to the joint CTC/AED loss and multi-path denotes the Mixture-of-Experts(MoE) architecture which can effectively increase the model capacity without remarkably increasing computation cost. Multi-level means that we introduce auxiliary loss at multiple level of a deep model to help training. We evaluate our proposed method on the public WenetSpeech dataset and experimental results show that the proposed method provides 12.2%-17.6% relative CER improvement over the baseline model trained by Wenet toolkit. On our large scale dataset of 150k hours corpus, the 3M model has also shown obvious superiority over the baseline Conformer model. Code is publicly available at https://github.com/tencent-ailab/3m-asr. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.03178v2-abstract-full').style.display = 'none'; document.getElementById('2204.03178v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 1 figure. Submitted to INTERSPEECH 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.03969">arXiv:2203.03969</a> <span> [<a href="https://arxiv.org/pdf/2203.03969">pdf</a>, <a href="https://arxiv.org/format/2203.03969">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Dynamic Hierarchical Framework for IoT-assisted Metaverse Synchronization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Han%2C+Y">Yue Han</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/eess?searchtype=author&query=Leung%2C+C">Cyril Leung</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+K">Kun Zhu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shaohan Feng</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+S+X">Sherman Xuemin Shen</a>, <a href="/search/eess?searchtype=author&query=Miao%2C+C">Chunyan Miao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.03969v3-abstract-short" style="display: inline;"> Metaverse has recently attracted much attention from both academia and industry. Virtual services, ranging from virtual driver training to online route optimization for smart goods delivery, are emerging in the Metaverse. To make the human experience of virtual life more real, digital twins (DTs), namely digital replicas of physical objects, are key enablers. However, DT status may not always accu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.03969v3-abstract-full').style.display = 'inline'; document.getElementById('2203.03969v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.03969v3-abstract-full" style="display: none;"> Metaverse has recently attracted much attention from both academia and industry. Virtual services, ranging from virtual driver training to online route optimization for smart goods delivery, are emerging in the Metaverse. To make the human experience of virtual life more real, digital twins (DTs), namely digital replicas of physical objects, are key enablers. However, DT status may not always accurately reflect that of its real-world twin because the latter may be subject to changes with time. As such, it is necessary to synchronize a DT with its physical counterpart to ensure that its status is accurate for virtual businesses in the Metaverse. In this paper, we propose a dynamic hierarchical framework in which a group of IoT devices is incentivized to sense and collect physical objects' status information collectively so as to assists virtual service providers (VSPs) in synchronizing DTs. Based on the collected sensing data and the value decay rate of the DTs, the VSPs can determine synchronization intensities to maximize their payoffs. In our proposed dynamic hierarchical framework, the lower-level evolutionary game captures the VSPs selection by the IoT device population, and the upper-level differential game captures the VSPs payoffs, which are affected by the synchronization strategy, IoT devices selections, and the DTs value status, given VSPs are simultaneous decision makers. We further consider the case in which some VSPs are first movers and extend it as a Stackelberg differential game. We theoretically and experimentally show that the equilibrium to the lower-level game exists and is evolutionarily robust, and provide a sensitivity analysis with respect to various system parameters. Experiments show that the proposed dynamic hierarchical game outperform the baseline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.03969v3-abstract-full').style.display = 'none'; document.getElementById('2203.03969v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.11880">arXiv:2202.11880</a> <span> [<a href="https://arxiv.org/pdf/2202.11880">pdf</a>, <a href="https://arxiv.org/format/2202.11880">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> On Nash-Stackelberg-Nash Games under Decision-Dependent Uncertainties: Model and Equilibrium </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yunfan Zhang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhaojian Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yue Chen</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuanglei Feng</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qiuwei Wu</a>, <a href="/search/eess?searchtype=author&query=Hou%2C+Y">Yunhe Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.11880v1-abstract-short" style="display: inline;"> In this paper, we discuss a class of two-stage hierarchical games with multiple leaders and followers, which is called Nash-Stackelberg-Nash (N-S-N) games. Particularly, we consider N-S-N games under decision-dependent uncertainties (DDUs). DDUs refer to the uncertainties that are affected by the strategies of decision-makers and have been rarely addressed in game equilibrium analysis. In this pap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.11880v1-abstract-full').style.display = 'inline'; document.getElementById('2202.11880v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.11880v1-abstract-full" style="display: none;"> In this paper, we discuss a class of two-stage hierarchical games with multiple leaders and followers, which is called Nash-Stackelberg-Nash (N-S-N) games. Particularly, we consider N-S-N games under decision-dependent uncertainties (DDUs). DDUs refer to the uncertainties that are affected by the strategies of decision-makers and have been rarely addressed in game equilibrium analysis. In this paper, we first formulate the N-S-N games with DDUs of complete ignorance, where the interactions between the players and DDUs are characterized by uncertainty sets that depend parametrically on the players' strategies. Then, a rigorous definition for the equilibrium of the game is established by consolidating generalized Nash equilibrium and Pareto-Nash equilibrium. Afterward, we prove the existence of the equilibrium of N-S-N games under DDUs by applying Kakutani's fixed-point theorem. Finally, an illustrative example is provided to show the impact of DDUs on the equilibrium of N-S-N games. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.11880v1-abstract-full').style.display = 'none'; document.getElementById('2202.11880v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.04542">arXiv:2202.04542</a> <span> [<a href="https://arxiv.org/pdf/2202.04542">pdf</a>, <a href="https://arxiv.org/format/2202.04542">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Spectrally Adaptive Common Spatial Patterns </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Mousavi%2C+M">Mahta Mousavi</a>, <a href="/search/eess?searchtype=author&query=Lybrand%2C+E">Eric Lybrand</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shuangquan Feng</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+S">Shuai Tang</a>, <a href="/search/eess?searchtype=author&query=Saab%2C+R">Rayan Saab</a>, <a href="/search/eess?searchtype=author&query=de+Sa%2C+V">Virginia de Sa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.04542v1-abstract-short" style="display: inline;"> The method of Common Spatial Patterns (CSP) is widely used for feature extraction of electroencephalography (EEG) data, such as in motor imagery brain-computer interface (BCI) systems. It is a data-driven method estimating a set of spatial filters so that the power of the filtered EEG signal is maximized for one motor imagery class and minimized for the other. This method, however, is prone to ove… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.04542v1-abstract-full').style.display = 'inline'; document.getElementById('2202.04542v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.04542v1-abstract-full" style="display: none;"> The method of Common Spatial Patterns (CSP) is widely used for feature extraction of electroencephalography (EEG) data, such as in motor imagery brain-computer interface (BCI) systems. It is a data-driven method estimating a set of spatial filters so that the power of the filtered EEG signal is maximized for one motor imagery class and minimized for the other. This method, however, is prone to overfitting and is known to suffer from poor generalization especially with limited calibration data. Additionally, due to the high heterogeneity in brain data and the non-stationarity of brain activity, CSP is usually trained for each user separately resulting in long calibration sessions or frequent re-calibrations that are tiring for the user. In this work, we propose a novel algorithm called Spectrally Adaptive Common Spatial Patterns (SACSP) that improves CSP by learning a temporal/spectral filter for each spatial filter so that the spatial filters are concentrated on the most relevant temporal frequencies for each user. We show the efficacy of SACSP in providing better generalizability and higher classification accuracy from calibration to online control compared to existing methods. Furthermore, we show that SACSP provides neurophysiologically relevant information about the temporal frequencies of the filtered signals. Our results highlight the differences in the motor imagery signal among BCI users as well as spectral differences in the signals generated for each class, and show the importance of learning robust user-specific features in a data-driven manner. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.04542v1-abstract-full').style.display = 'none'; document.getElementById('2202.04542v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.11207">arXiv:2201.11207</a> <span> [<a href="https://arxiv.org/pdf/2201.11207">pdf</a>, <a href="https://arxiv.org/format/2201.11207">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Discovering Phonetic Inventories with Crosslingual Automatic Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=%C5%BBelasko%2C+P">Piotr 呕elasko</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Siyuan Feng</a>, <a href="/search/eess?searchtype=author&query=Velazquez%2C+L+M">Laureano Moro Velazquez</a>, <a href="/search/eess?searchtype=author&query=Abavisani%2C+A">Ali Abavisani</a>, <a href="/search/eess?searchtype=author&query=Bhati%2C+S">Saurabhchand Bhati</a>, <a href="/search/eess?searchtype=author&query=Scharenborg%2C+O">Odette Scharenborg</a>, <a href="/search/eess?searchtype=author&query=Hasegawa-Johnson%2C+M">Mark Hasegawa-Johnson</a>, <a href="/search/eess?searchtype=author&query=Dehak%2C+N">Najim Dehak</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.11207v2-abstract-short" style="display: inline;"> The high cost of data acquisition makes Automatic Speech Recognition (ASR) model training problematic for most existing languages, including languages that do not even have a written script, or for which the phone inventories remain unknown. Past works explored multilingual training, transfer learning, as well as zero-shot learning in order to build ASR systems for these low-resource languages. Wh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.11207v2-abstract-full').style.display = 'inline'; document.getElementById('2201.11207v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.11207v2-abstract-full" style="display: none;"> The high cost of data acquisition makes Automatic Speech Recognition (ASR) model training problematic for most existing languages, including languages that do not even have a written script, or for which the phone inventories remain unknown. Past works explored multilingual training, transfer learning, as well as zero-shot learning in order to build ASR systems for these low-resource languages. While it has been shown that the pooling of resources from multiple languages is helpful, we have not yet seen a successful application of an ASR model to a language unseen during training. A crucial step in the adaptation of ASR from seen to unseen languages is the creation of the phone inventory of the unseen language. The ultimate goal of our work is to build the phone inventory of a language unseen during training in an unsupervised way without any knowledge about the language. In this paper, we 1) investigate the influence of different factors (i.e., model architecture, phonotactic model, type of speech representation) on phone recognition in an unknown language; 2) provide an analysis of which phones transfer well across languages and which do not in order to understand the limitations of and areas for further improvement for automatic phone inventory creation; and 3) present different methods to build a phone inventory of an unseen language in an unsupervised way. To that end, we conducted mono-, multi-, and crosslingual experiments on a set of 13 phonetically diverse languages and several in-depth analyses. We found a number of universal phone tokens (IPA symbols) that are well-recognized cross-linguistically. Through a detailed analysis of results, we conclude that unique sounds, similar sounds, and tone languages remain a major challenge for phonetic inventory discovery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.11207v2-abstract-full').style.display = 'none'; document.getElementById('2201.11207v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication in Computer Speech and Language</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.04908">arXiv:2201.04908</a> <span> [<a href="https://arxiv.org/pdf/2201.04908">pdf</a>, <a href="https://arxiv.org/ps/2201.04908">ps</a>, <a href="https://arxiv.org/format/2201.04908">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> The Effectiveness of Time Stretching for Enhancing Dysarthric Speech for Improved Dysarthric Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Prananta%2C+L">Luke Prananta</a>, <a href="/search/eess?searchtype=author&query=Halpern%2C+B+M">Bence Mark Halpern</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Siyuan Feng</a>, <a href="/search/eess?searchtype=author&query=Scharenborg%2C+O">Odette Scharenborg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.04908v1-abstract-short" style="display: inline;"> In this paper, we investigate several existing and a new state-of-the-art generative adversarial network-based (GAN) voice conversion method for enhancing dysarthric speech for improved dysarthric speech recognition. We compare key components of existing methods as part of a rigorous ablation study to find the most effective solution to improve dysarthric speech recognition. We find that straightf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.04908v1-abstract-full').style.display = 'inline'; document.getElementById('2201.04908v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.04908v1-abstract-full" style="display: none;"> In this paper, we investigate several existing and a new state-of-the-art generative adversarial network-based (GAN) voice conversion method for enhancing dysarthric speech for improved dysarthric speech recognition. We compare key components of existing methods as part of a rigorous ablation study to find the most effective solution to improve dysarthric speech recognition. We find that straightforward signal processing methods such as stationary noise removal and vocoder-based time stretching lead to dysarthric speech recognition results comparable to those obtained when using state-of-the-art GAN-based voice conversion methods as measured using a phoneme recognition task. Additionally, our proposed solution of a combination of MaskCycleGAN-VC and time stretched enhancement is able to improve the phoneme recognition results for certain dysarthric speakers compared to our time stretched baseline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.04908v1-abstract-full').style.display = 'none'; document.getElementById('2201.04908v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Extended version of paper to be submitted to Interspeech 2022. 6 pages, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.11831">arXiv:2111.11831</a> <span> [<a href="https://arxiv.org/pdf/2111.11831">pdf</a>, <a href="https://arxiv.org/format/2111.11831">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> SpeechMoE2: Mixture-of-Experts Model with Improved Routing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=You%2C+Z">Zhao You</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+S">Shulin Feng</a>, <a href="/search/eess?searchtype=author&query=Su%2C+D">Dan Su</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.11831v1-abstract-short" style="display: inline;"> Mixture-of-experts based acoustic models with dynamic routing mechanisms have proved promising results for speech recognition. The design principle of router architecture is important for the large model capacity and high computational efficiency. Our previous work SpeechMoE only uses local grapheme embedding to help routers to make route decisions. To further improve speech recognition performanc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.11831v1-abstract-full').style.display = 'inline'; document.getElementById('2111.11831v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.11831v1-abstract-full" style="display: none;"> Mixture-of-experts based acoustic models with dynamic routing mechanisms have proved promising results for speech recognition. The design principle of router architecture is important for the large model capacity and high computational efficiency. Our previous work SpeechMoE only uses local grapheme embedding to help routers to make route decisions. To further improve speech recognition performance against varying domains and accents, we propose a new router architecture which integrates additional global domain and accent embedding into router input to promote adaptability. Experimental results show that the proposed SpeechMoE2 can achieve lower character error rate (CER) with comparable parameters than SpeechMoE on both multi-domain and multi-accent task. Primarily, the proposed method provides up to 1.6% - 4.8% relative CER improvement for the multidomain task and 1.9% - 17.7% relative CER improvement for the multi-accent task respectively. Besides, increasing the number of experts also achieves consistent performance improvement and keeps the computational cost constant. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.11831v1-abstract-full').style.display = 'none'; document.getElementById('2111.11831v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 1 figure. Submitted to ICASSP 2022</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Feng%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Feng%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Feng%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Feng%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository