CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 68 results for author: <span class="mathjax">Guo, W</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&amp;query=Guo%2C+W">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Guo, W"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Guo%2C+W&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Guo, W"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Guo%2C+W&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+W&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+W&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15447">arXiv:2411.15447</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.15447">pdf</a>, <a href="https://arxiv.org/format/2411.15447">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Gotta Hear Them All: Sound Source Aware Vision to Audio Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wei Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+H">Heng Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Ma%2C+J">Jianbo Ma</a>, <a href="/search/eess?searchtype=author&amp;query=Cai%2C+W">Weidong Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15447v2-abstract-short" style="display: inline;"> Vision-to-audio (V2A) synthesis has broad applications in multimedia. Recent advancements of V2A methods have made it possible to generate relevant audios from inputs of videos or still images. However, the immersiveness and expressiveness of the generation are limited. One possible problem is that existing methods solely rely on the global scene and overlook details of local sounding objects (i.e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15447v2-abstract-full').style.display = 'inline'; document.getElementById('2411.15447v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15447v2-abstract-full" style="display: none;"> Vision-to-audio (V2A) synthesis has broad applications in multimedia. Recent advancements of V2A methods have made it possible to generate relevant audios from inputs of videos or still images. However, the immersiveness and expressiveness of the generation are limited. One possible problem is that existing methods solely rely on the global scene and overlook details of local sounding objects (i.e., sound sources). To address this issue, we propose a Sound Source-Aware V2A (SSV2A) generator. SSV2A is able to locally perceive multimodal sound sources from a scene with visual detection and cross-modality translation. It then contrastively learns a Cross-Modal Sound Source (CMSS) Manifold to semantically disambiguate each source. Finally, we attentively mix their CMSS semantics into a rich audio representation, from which a pretrained audio generator outputs the sound. To model the CMSS manifold, we curate a novel single-sound-source visual-audio dataset VGGS3 from VGGSound. We also design a Sound Source Matching Score to measure localized audio relevance. This is to our knowledge the first work to address V2A generation at the sound-source level. Extensive experiments show that SSV2A surpasses state-of-the-art methods in both generation fidelity and relevance. We further demonstrate SSV2A&#39;s ability to achieve intuitive V2A control by compositing vision, text, and audio conditions. Our SSV2A generation can be tried and heard at https://ssv2a.github.io/SSV2A-demo . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15447v2-abstract-full').style.display = 'none'; document.getElementById('2411.15447v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 9 figures, source code released at https://github.com/wguo86/SSV2A</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18610">arXiv:2410.18610</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.18610">pdf</a>, <a href="https://arxiv.org/format/2410.18610">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Joint Representation Using Continuous and Discrete Features for Cardiovascular Diseases Risk Prediction on Chest CT Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xu%2C+M">Minfeng Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Fan%2C+C">Chen-Chen Fan</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+Y">Yan-Jie Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenchao Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+P">Pan Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Qi%2C+J">Jing Qi</a>, <a href="/search/eess?searchtype=author&amp;query=Lu%2C+L">Le Lu</a>, <a href="/search/eess?searchtype=author&amp;query=Chao%2C+H">Hanqing Chao</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+K">Kunlun He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18610v2-abstract-short" style="display: inline;"> Cardiovascular diseases (CVD) remain a leading health concern and contribute significantly to global mortality rates. While clinical advancements have led to a decline in CVD mortality, accurately identifying individuals who could benefit from preventive interventions remains an unsolved challenge in preventive cardiology. Current CVD risk prediction models, recommended by guidelines, are based on&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18610v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18610v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18610v2-abstract-full" style="display: none;"> Cardiovascular diseases (CVD) remain a leading health concern and contribute significantly to global mortality rates. While clinical advancements have led to a decline in CVD mortality, accurately identifying individuals who could benefit from preventive interventions remains an unsolved challenge in preventive cardiology. Current CVD risk prediction models, recommended by guidelines, are based on limited traditional risk factors or use CT imaging to acquire quantitative biomarkers, and still have limitations in predictive accuracy and applicability. On the other hand, end-to-end trained CVD risk prediction methods leveraging deep learning on CT images often fail to provide transparent and explainable decision grounds for assisting physicians. In this work, we proposed a novel joint representation that integrates discrete quantitative biomarkers and continuous deep features extracted from chest CT scans. Our approach initiated with a deep CVD risk classification model by capturing comprehensive continuous deep learning features while jointly obtaining currently clinical-established quantitative biomarkers via segmentation models. In the feature joint representation stage, we use an instance-wise feature-gated mechanism to align the continuous and discrete features, followed by a soft instance-wise feature interaction mechanism fostering independent and effective feature interaction for the final CVD risk prediction. Our method substantially improves CVD risk predictive performance and offers individual contribution analysis of each biomarker, which is important in assisting physicians&#39; decision-making processes. We validated our method on a public chest low-dose CT dataset and a private external chest standard-dose CT patient cohort of 17,207 CT volumes from 6,393 unique subjects, and demonstrated superior predictive performance, achieving AUCs of 0.875 and 0.843, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18610v2-abstract-full').style.display = 'none'; document.getElementById('2410.18610v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13832">arXiv:2409.13832</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.13832">pdf</a>, <a href="https://arxiv.org/format/2409.13832">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> GTSinger: A Global Multi-Technique Singing Corpus with Realistic Music Scores for All Singing Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Pan%2C+C">Changhao Pan</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenxiang Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+R">Ruiqi Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Z">Zhiyuan Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+J">Jialei Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+W">Wenhao Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Lu%2C+J">Jingyu Lu</a>, <a href="/search/eess?searchtype=author&amp;query=Hong%2C+Z">Zhiqing Hong</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+C">Chuxin Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+L">LiChao Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+J">Jinzheng He</a>, <a href="/search/eess?searchtype=author&amp;query=Jiang%2C+Z">Ziyue Jiang</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+Y">Yuxin Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+C">Chen Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+J">Jiecheng Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+X">Xinyu Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+Z">Zhou Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13832v4-abstract-short" style="display: inline;"> The scarcity of high-quality and multi-task singing datasets significantly hinders the development of diverse controllable and personalized singing tasks, as existing singing datasets suffer from low quality, limited diversity of languages and singers, absence of multi-technique information and realistic music scores, and poor task suitability. To tackle these problems, we present GTSinger, a larg&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13832v4-abstract-full').style.display = 'inline'; document.getElementById('2409.13832v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13832v4-abstract-full" style="display: none;"> The scarcity of high-quality and multi-task singing datasets significantly hinders the development of diverse controllable and personalized singing tasks, as existing singing datasets suffer from low quality, limited diversity of languages and singers, absence of multi-technique information and realistic music scores, and poor task suitability. To tackle these problems, we present GTSinger, a large global, multi-technique, free-to-use, high-quality singing corpus with realistic music scores, designed for all singing tasks, along with its benchmarks. Particularly, (1) we collect 80.59 hours of high-quality singing voices, forming the largest recorded singing dataset; (2) 20 professional singers across nine widely spoken languages offer diverse timbres and styles; (3) we provide controlled comparison and phoneme-level annotations of six commonly used singing techniques, helping technique modeling and control; (4) GTSinger offers realistic music scores, assisting real-world musical composition; (5) singing voices are accompanied by manual phoneme-to-audio alignments, global style labels, and 16.16 hours of paired speech for various singing tasks. Moreover, to facilitate the use of GTSinger, we conduct four benchmark experiments: technique-controllable singing voice synthesis, technique recognition, style transfer, and speech-to-singing conversion. The corpus and demos can be found at http://gtsinger.github.io. We provide the dataset and the code for processing data and conducting benchmarks at https://huggingface.co/datasets/GTSinger/GTSinger and https://github.com/GTSinger/GTSinger. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13832v4-abstract-full').style.display = 'none'; document.getElementById('2409.13832v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024 (Spotlight)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01695">arXiv:2409.01695</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.01695">pdf</a>, <a href="https://arxiv.org/format/2409.01695">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> USTC-KXDIGIT System Description for ASVspoof5 Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Chen%2C+Y">Yihao Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+H">Haochen Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Jiang%2C+N">Nan Jiang</a>, <a href="/search/eess?searchtype=author&amp;query=Xia%2C+X">Xiang Xia</a>, <a href="/search/eess?searchtype=author&amp;query=Gu%2C+Q">Qing Gu</a>, <a href="/search/eess?searchtype=author&amp;query=Hao%2C+Y">Yunqi Hao</a>, <a href="/search/eess?searchtype=author&amp;query=Cai%2C+P">Pengfei Cai</a>, <a href="/search/eess?searchtype=author&amp;query=Guan%2C+Y">Yu Guan</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+J">Jialong Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Xie%2C+W">Weilin Xie</a>, <a href="/search/eess?searchtype=author&amp;query=Fang%2C+L">Lei Fang</a>, <a href="/search/eess?searchtype=author&amp;query=Fang%2C+S">Sian Fang</a>, <a href="/search/eess?searchtype=author&amp;query=Song%2C+Y">Yan Song</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+L">Lin Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+M">Minqiang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01695v1-abstract-short" style="display: inline;"> This paper describes the USTC-KXDIGIT system submitted to the ASVspoof5 Challenge for Track 1 (speech deepfake detection) and Track 2 (spoofing-robust automatic speaker verification, SASV). Track 1 showcases a diverse range of technical qualities from potential processing algorithms and includes both open and closed conditions. For these conditions, our system consists of a cascade of a frontend f&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01695v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01695v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01695v1-abstract-full" style="display: none;"> This paper describes the USTC-KXDIGIT system submitted to the ASVspoof5 Challenge for Track 1 (speech deepfake detection) and Track 2 (spoofing-robust automatic speaker verification, SASV). Track 1 showcases a diverse range of technical qualities from potential processing algorithms and includes both open and closed conditions. For these conditions, our system consists of a cascade of a frontend feature extractor and a back-end classifier. We focus on extensive embedding engineering and enhancing the generalization of the back-end classifier model. Specifically, the embedding engineering is based on hand-crafted features and speech representations from a self-supervised model, used for closed and open conditions, respectively. To detect spoof attacks under various adversarial conditions, we trained multiple systems on an augmented training set. Additionally, we used voice conversion technology to synthesize fake audio from genuine audio in the training set to enrich the synthesis algorithms. To leverage the complementary information learned by different model architectures, we employed activation ensemble and fused scores from different systems to obtain the final decision score for spoof detection. During the evaluation phase, the proposed methods achieved 0.3948 minDCF and 14.33% EER in the close condition, and 0.0750 minDCF and 2.59% EER in the open condition, demonstrating the robustness of our submitted systems under adversarial conditions. In Track 2, we continued using the CM system from Track 1 and fused it with a CNN-based ASV system. This approach achieved 0.2814 min-aDCF in the closed condition and 0.0756 min-aDCF in the open condition, showcasing superior performance in the SASV system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01695v1-abstract-full').style.display = 'none'; document.getElementById('2409.01695v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ASVspoof5 workshop paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16732">arXiv:2408.16732</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.16732">pdf</a>, <a href="https://arxiv.org/format/2408.16732">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Automatic detection of Mild Cognitive Impairment using high-dimensional acoustic features in spontaneous speech </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+C">Cong Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenxing Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Dai%2C+H">Hongsheng Dai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16732v1-abstract-short" style="display: inline;"> This study addresses the TAUKADIAL challenge, focusing on the classification of speech from people with Mild Cognitive Impairment (MCI) and neurotypical controls. We conducted three experiments comparing five machine-learning methods: Random Forests, Sparse Logistic Regression, k-Nearest Neighbors, Sparse Support Vector Machine, and Decision Tree, utilizing 1076 acoustic features automatically ext&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16732v1-abstract-full').style.display = 'inline'; document.getElementById('2408.16732v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16732v1-abstract-full" style="display: none;"> This study addresses the TAUKADIAL challenge, focusing on the classification of speech from people with Mild Cognitive Impairment (MCI) and neurotypical controls. We conducted three experiments comparing five machine-learning methods: Random Forests, Sparse Logistic Regression, k-Nearest Neighbors, Sparse Support Vector Machine, and Decision Tree, utilizing 1076 acoustic features automatically extracted using openSMILE. In Experiment 1, the entire dataset was used to train a language-agnostic model. Experiment 2 introduced a language detection step, leading to separate model training for each language. Experiment 3 further enhanced the language-agnostic model from Experiment 1, with a specific focus on evaluating the robustness of the models using out-of-sample test data. Across all three experiments, results consistently favored models capable of handling high-dimensional data, such as Random Forest and Sparse Logistic Regression, in classifying speech from MCI and controls. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16732v1-abstract-full').style.display = 'none'; document.getElementById('2408.16732v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14977">arXiv:2408.14977</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.14977">pdf</a>, <a href="https://arxiv.org/format/2408.14977">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LN-Gen: Rectal Lymph Nodes Generation via Anatomical Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weidong Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+H">Hantao Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Wan%2C+S">Shouhong Wan</a>, <a href="/search/eess?searchtype=author&amp;query=Zou%2C+B">Bingbing Zou</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+W">Wanqin Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Jin%2C+P">Peiquan Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14977v1-abstract-short" style="display: inline;"> Accurate segmentation of rectal lymph nodes is crucial for the staging and treatment planning of rectal cancer. However, the complexity of the surrounding anatomical structures and the scarcity of annotated data pose significant challenges. This study introduces a novel lymph node synthesis technique aimed at generating diverse and realistic synthetic rectal lymph node samples to mitigate the reli&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14977v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14977v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14977v1-abstract-full" style="display: none;"> Accurate segmentation of rectal lymph nodes is crucial for the staging and treatment planning of rectal cancer. However, the complexity of the surrounding anatomical structures and the scarcity of annotated data pose significant challenges. This study introduces a novel lymph node synthesis technique aimed at generating diverse and realistic synthetic rectal lymph node samples to mitigate the reliance on manual annotation. Unlike direct diffusion methods, which often produce masks that are discontinuous and of suboptimal quality, our approach leverages an implicit SDF-based method for mask generation, ensuring the production of continuous, stable, and morphologically diverse masks. Experimental results demonstrate that our synthetic data significantly improves segmentation performance. Our work highlights the potential of diffusion model for accurately synthesizing structurally complex lesions, such as lymph nodes in rectal cancer, alleviating the challenge of limited annotated data in this field and aiding in advancements in rectal cancer diagnosis and treatment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14977v1-abstract-full').style.display = 'none'; document.getElementById('2408.14977v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15222">arXiv:2406.15222</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.15222">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rapid and Accurate Diagnosis of Acute Aortic Syndrome using Non-contrast CT: A Large-scale, Retrospective, Multi-center and AI-based Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Hu%2C+Y">Yujian Hu</a>, <a href="/search/eess?searchtype=author&amp;query=Xiang%2C+Y">Yilang Xiang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+Y">Yan-Jie Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+Y">Yangyan He</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+S">Shifeng Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Du%2C+X">Xiaolong Du</a>, <a href="/search/eess?searchtype=author&amp;query=Den%2C+C">Chunlan Den</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+Y">Youyao Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+G">Gaofeng Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+Z">Zhengyao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+J">Jingyong Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+W">Wenjun Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+X">Xuejun Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+D">Donglin Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Q">Qianqian Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+Z">Zhenjiang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Qiu%2C+C">Chenyang Qiu</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+Z">Ziheng Wu</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+Y">Yunjun He</a>, <a href="/search/eess?searchtype=author&amp;query=Tian%2C+C">Chen Tian</a>, <a href="/search/eess?searchtype=author&amp;query=Qiu%2C+Y">Yihui Qiu</a>, <a href="/search/eess?searchtype=author&amp;query=Lin%2C+Z">Zuodong Lin</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+X">Xiaolong Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+Y">Yuan He</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+Z">Zhenpeng Yuan</a> , et al. (15 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15222v3-abstract-short" style="display: inline;"> Chest pain symptoms are highly prevalent in emergency departments (EDs), where acute aortic syndrome (AAS) is a catastrophic cardiovascular emergency with a high fatality rate, especially when timely and accurate treatment is not administered. However, current triage practices in the ED can cause up to approximately half of patients with AAS to have an initially missed diagnosis or be misdiagnosed&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15222v3-abstract-full').style.display = 'inline'; document.getElementById('2406.15222v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15222v3-abstract-full" style="display: none;"> Chest pain symptoms are highly prevalent in emergency departments (EDs), where acute aortic syndrome (AAS) is a catastrophic cardiovascular emergency with a high fatality rate, especially when timely and accurate treatment is not administered. However, current triage practices in the ED can cause up to approximately half of patients with AAS to have an initially missed diagnosis or be misdiagnosed as having other acute chest pain conditions. Subsequently, these AAS patients will undergo clinically inaccurate or suboptimal differential diagnosis. Fortunately, even under these suboptimal protocols, nearly all these patients underwent non-contrast CT covering the aorta anatomy at the early stage of differential diagnosis. In this study, we developed an artificial intelligence model (DeepAAS) using non-contrast CT, which is highly accurate for identifying AAS and provides interpretable results to assist in clinical decision-making. Performance was assessed in two major phases: a multi-center retrospective study (n = 20,750) and an exploration in real-world emergency scenarios (n = 137,525). In the multi-center cohort, DeepAAS achieved a mean area under the receiver operating characteristic curve of 0.958 (95% CI 0.950-0.967). In the real-world cohort, DeepAAS detected 109 AAS patients with misguided initial suspicion, achieving 92.6% (95% CI 76.2%-97.5%) in mean sensitivity and 99.2% (95% CI 99.1%-99.3%) in mean specificity. Our AI model performed well on non-contrast CT at all applicable early stages of differential diagnosis workflows, effectively reduced the overall missed diagnosis and misdiagnosis rate from 48.8% to 4.8% and shortened the diagnosis time for patients with misguided initial suspicion from an average of 681.8 (74-11,820) mins to 68.5 (23-195) mins. DeepAAS could effectively fill the gap in the current clinical workflow without requiring additional tests. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15222v3-abstract-full').style.display = 'none'; document.getElementById('2406.15222v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01173">arXiv:2406.01173</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.01173">pdf</a>, <a href="https://arxiv.org/format/2406.01173">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Cascade Network Stability of Synchronized Traffic Load Balancing with Heterogeneous Energy Efficiency Policies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zou%2C+M">Mengbang Zou</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01173v1-abstract-short" style="display: inline;"> Cascade stability of load balancing is critical for ensuring high efficiency service delivery and preventing undesirable handovers. In energy efficient networks that employ diverse sleep mode operations, handing over traffic to neighbouring cells&#39; expanded coverage must be done with minimal side effects. Current research is largely concerned with designing distributed and centralized efficient loa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01173v1-abstract-full').style.display = 'inline'; document.getElementById('2406.01173v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01173v1-abstract-full" style="display: none;"> Cascade stability of load balancing is critical for ensuring high efficiency service delivery and preventing undesirable handovers. In energy efficient networks that employ diverse sleep mode operations, handing over traffic to neighbouring cells&#39; expanded coverage must be done with minimal side effects. Current research is largely concerned with designing distributed and centralized efficient load balancing policies that are locally stable. There is a major research gap in identifying large-scale cascade stability for networks with heterogeneous load balancing policies arising from diverse plug-and-play sleep mode policies in ORAN, which will cause heterogeneity in the network stability behaviour. Here, we investigate whether cells arbitrarily connected for load balancing and having an arbitrary number undergoing sleep mode can: (i) synchronize to a desirable load-balancing state, and (ii) maintain stability. For the first time, we establish the criterion for stability and prove its validity for any general load dynamics and random network topology. Whilst its general form allows all load balancing and sleep mode dynamics to be incorporated, we propose an ORAN architecture where the network service management and orchestration (SMO) must monitor new load balancing policies to ensure overall network cascade stability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01173v1-abstract-full').style.display = 'none'; document.getElementById('2406.01173v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00320">arXiv:2406.00320</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.00320">pdf</a>, <a href="https://arxiv.org/format/2406.00320">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Frieren: Efficient Video-to-Audio Generation Network with Rectified Flow Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Y">Yongqi Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenxiang Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+R">Rongjie Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+J">Jiawei Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Z">Zehan Wang</a>, <a href="/search/eess?searchtype=author&amp;query=You%2C+F">Fuming You</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+R">Ruiqi Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+Z">Zhou Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00320v3-abstract-short" style="display: inline;"> Video-to-audio (V2A) generation aims to synthesize content-matching audio from silent video, and it remains challenging to build V2A models with high generation quality, efficiency, and visual-audio temporal synchrony. We propose Frieren, a V2A model based on rectified flow matching. Frieren regresses the conditional transport vector field from noise to spectrogram latent with straight paths and c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00320v3-abstract-full').style.display = 'inline'; document.getElementById('2406.00320v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00320v3-abstract-full" style="display: none;"> Video-to-audio (V2A) generation aims to synthesize content-matching audio from silent video, and it remains challenging to build V2A models with high generation quality, efficiency, and visual-audio temporal synchrony. We propose Frieren, a V2A model based on rectified flow matching. Frieren regresses the conditional transport vector field from noise to spectrogram latent with straight paths and conducts sampling by solving ODE, outperforming autoregressive and score-based models in terms of audio quality. By employing a non-autoregressive vector field estimator based on a feed-forward transformer and channel-level cross-modal feature fusion with strong temporal alignment, our model generates audio that is highly synchronized with the input video. Furthermore, through reflow and one-step distillation with guided vector field, our model can generate decent audio in a few, or even only one sampling step. Experiments indicate that Frieren achieves state-of-the-art performance in both generation quality and temporal alignment on VGGSound, with alignment accuracy reaching 97.22%, and 6.2% improvement in inception score over the strong diffusion-based baseline. Audio samples are available at http://frieren-v2a.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00320v3-abstract-full').style.display = 'none'; document.getElementById('2406.00320v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.14398">arXiv:2405.14398</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.14398">pdf</a>, <a href="https://arxiv.org/format/2405.14398">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> SpGesture: Source-Free Domain-adaptive sEMG-based Gesture Recognition with Jaccard Attentive Spiking Neural Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weiyu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+Y">Ying Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+Y">Yijie Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Qiao%2C+Z">Ziyue Qiao</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+Y">Yongkui Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Xiong%2C+H">Hui Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.14398v3-abstract-short" style="display: inline;"> Surface electromyography (sEMG) based gesture recognition offers a natural and intuitive interaction modality for wearable devices. Despite significant advancements in sEMG-based gesture-recognition models, existing methods often suffer from high computational latency and increased energy consumption. Additionally, the inherent instability of sEMG signals, combined with their sensitivity to distri&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14398v3-abstract-full').style.display = 'inline'; document.getElementById('2405.14398v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.14398v3-abstract-full" style="display: none;"> Surface electromyography (sEMG) based gesture recognition offers a natural and intuitive interaction modality for wearable devices. Despite significant advancements in sEMG-based gesture-recognition models, existing methods often suffer from high computational latency and increased energy consumption. Additionally, the inherent instability of sEMG signals, combined with their sensitivity to distribution shifts in real-world settings, compromises model robustness. To tackle these challenges, we propose a novel SpGesture framework based on Spiking Neural Networks, which possesses several unique merits compared with existing methods: (1) Robustness: By utilizing membrane potential as a memory list, we pioneer the introduction of Source-Free Domain Adaptation into SNN for the first time. This enables SpGesture to mitigate the accuracy degradation caused by distribution shifts. (2) High Accuracy: With a novel Spiking Jaccard Attention, SpGesture enhances the SNNs&#39; ability to represent sEMG features, leading to a notable rise in system accuracy. To validate SpGesture&#39;s performance, we collected a new sEMG gesture dataset which has different forearm postures, where SpGesture achieved the highest accuracy among the baselines ($89.26\%$). Moreover, the actual deployment on the CPU demonstrated a system latency below 100ms, well within real-time requirements. This impressive performance showcases SpGesture&#39;s potential to enhance the applicability of sEMG in real-world scenarios. The code is available at https://github.com/guoweiyu/SpGesture/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14398v3-abstract-full').style.display = 'none'; document.getElementById('2405.14398v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09752">arXiv:2405.09752</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.09752">pdf</a>, <a href="https://arxiv.org/format/2405.09752">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Time-Varying Graph Signal Recovery Using High-Order Smoothness and Adaptive Low-rankness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weihong Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Lou%2C+Y">Yifei Lou</a>, <a href="/search/eess?searchtype=author&amp;query=Qin%2C+J">Jing Qin</a>, <a href="/search/eess?searchtype=author&amp;query=Yan%2C+M">Ming Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09752v1-abstract-short" style="display: inline;"> Time-varying graph signal recovery has been widely used in many applications, including climate change, environmental hazard monitoring, and epidemic studies. It is crucial to choose appropriate regularizations to describe the characteristics of the underlying signals, such as the smoothness of the signal over the graph domain and the low-rank structure of the spatial-temporal signal modeled in a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09752v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09752v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09752v1-abstract-full" style="display: none;"> Time-varying graph signal recovery has been widely used in many applications, including climate change, environmental hazard monitoring, and epidemic studies. It is crucial to choose appropriate regularizations to describe the characteristics of the underlying signals, such as the smoothness of the signal over the graph domain and the low-rank structure of the spatial-temporal signal modeled in a matrix form. As one of the most popular options, the graph Laplacian is commonly adopted in designing graph regularizations for reconstructing signals defined on a graph from partially observed data. In this work, we propose a time-varying graph signal recovery method based on the high-order Sobolev smoothness and an error-function weighted nuclear norm regularization to enforce the low-rankness. Two efficient algorithms based on the alternating direction method of multipliers and iterative reweighting are proposed, and convergence of one algorithm is shown in detail. We conduct various numerical experiments on synthetic and real-world data sets to demonstrate the proposed method&#39;s effectiveness compared to the state-of-the-art in graph signal recovery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09752v1-abstract-full').style.display = 'none'; document.getElementById('2405.09752v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04867">arXiv:2405.04867</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.04867">pdf</a>, <a href="https://arxiv.org/format/2405.04867">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MIPI 2024 Challenge on Demosaic for HybridEVS Camera: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wu%2C+Y">Yaqi Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Fan%2C+Z">Zhihao Fan</a>, <a href="/search/eess?searchtype=author&amp;query=Chu%2C+X">Xiaofeng Chu</a>, <a href="/search/eess?searchtype=author&amp;query=Ren%2C+J+S">Jimmy S. Ren</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+X">Xiaoming Li</a>, <a href="/search/eess?searchtype=author&amp;query=Yue%2C+Z">Zongsheng Yue</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+C">Chongyi Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+S">Shangcheng Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Feng%2C+R">Ruicheng Feng</a>, <a href="/search/eess?searchtype=author&amp;query=Dai%2C+Y">Yuekun Dai</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+P">Peiqing Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Loy%2C+C+C">Chen Change Loy</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+S">Senyan Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+Z">Zhijing Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+J">Jiaying Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+Y">Yurui Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Fu%2C+X">Xueyang Fu</a>, <a href="/search/eess?searchtype=author&amp;query=Zha%2C+Z">Zheng-Jun Zha</a>, <a href="/search/eess?searchtype=author&amp;query=Cao%2C+J">Jun Cao</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+C">Cheng Li</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+S">Shu Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Ma%2C+L">Liang Ma</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+S">Shiyang Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Zeng%2C+H">Haijin Zeng</a>, <a href="/search/eess?searchtype=author&amp;query=Feng%2C+K">Kai Feng</a> , et al. (24 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04867v1-abstract-short" style="display: inline;"> The increasing demand for computational photography and imaging on mobile platforms has led to the widespread development and integration of advanced image sensors with novel algorithms in camera systems. However, the scarcity of high-quality data for research and the rare opportunity for in-depth exchange of views from industry and academia constrain the development of mobile intelligent photogra&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04867v1-abstract-full').style.display = 'inline'; document.getElementById('2405.04867v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04867v1-abstract-full" style="display: none;"> The increasing demand for computational photography and imaging on mobile platforms has led to the widespread development and integration of advanced image sensors with novel algorithms in camera systems. However, the scarcity of high-quality data for research and the rare opportunity for in-depth exchange of views from industry and academia constrain the development of mobile intelligent photography and imaging (MIPI). Building on the achievements of the previous MIPI Workshops held at ECCV 2022 and CVPR 2023, we introduce our third MIPI challenge including three tracks focusing on novel image sensors and imaging algorithms. In this paper, we summarize and review the Nighttime Flare Removal track on MIPI 2024. In total, 170 participants were successfully registered, and 14 teams submitted results in the final testing phase. The developed solutions in this challenge achieved state-of-the-art performance on Nighttime Flare Removal. More details of this challenge and the link to the dataset can be found at https://mipi-challenge.org/MIPI2024/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04867v1-abstract-full').style.display = 'none'; document.getElementById('2405.04867v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MIPI@CVPR2024. Website: https://mipi-challenge.org/MIPI2024/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.11213">arXiv:2404.11213</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.11213">pdf</a>, <a href="https://arxiv.org/format/2404.11213">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Revisiting Noise Resilience Strategies in Gesture Recognition: Short-Term Enhancement in Surface Electromyographic Signal Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weiyu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Qiao%2C+Z">Ziyue Qiao</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+Y">Ying Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Xiong%2C+H">Hui Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.11213v1-abstract-short" style="display: inline;"> Gesture recognition based on surface electromyography (sEMG) has been gaining importance in many 3D Interactive Scenes. However, sEMG is easily influenced by various forms of noise in real-world environments, leading to challenges in providing long-term stable interactions through sEMG. Existing methods often struggle to enhance model noise resilience through various predefined data augmentation t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.11213v1-abstract-full').style.display = 'inline'; document.getElementById('2404.11213v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.11213v1-abstract-full" style="display: none;"> Gesture recognition based on surface electromyography (sEMG) has been gaining importance in many 3D Interactive Scenes. However, sEMG is easily influenced by various forms of noise in real-world environments, leading to challenges in providing long-term stable interactions through sEMG. Existing methods often struggle to enhance model noise resilience through various predefined data augmentation techniques. In this work, we revisit the problem from a short term enhancement perspective to improve precision and robustness against various common noisy scenarios with learnable denoise using sEMG intrinsic pattern information and sliding-window attention. We propose a Short Term Enhancement Module(STEM) which can be easily integrated with various models. STEM offers several benefits: 1) Learnable denoise, enabling noise reduction without manual data augmentation; 2) Scalability, adaptable to various models; and 3) Cost-effectiveness, achieving short-term enhancement through minimal weight-sharing in an efficient attention mechanism. In particular, we incorporate STEM into a transformer, creating the Short Term Enhanced Transformer (STET). Compared with best-competing approaches, the impact of noise on STET is reduced by more than 20%. We also report promising results on both classification and regression datasets and demonstrate that STEM generalizes across different gesture recognition tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.11213v1-abstract-full').style.display = 'none'; document.getElementById('2404.11213v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09131">arXiv:2404.09131</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.09131">pdf</a>, <a href="https://arxiv.org/format/2404.09131">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Design of Artificial Interference Signals for Covert Communication Aided by Multiple Friendly Nodes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Guo%2C+X+Z+W">Xuyang Zhao. Wei Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Y">Yongchao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09131v3-abstract-short" style="display: inline;"> In this paper, we consider a scenario of covert communication aided by multiple friendly interference nodes. The objective is to conceal the legitimate communication link under the surveillance of a warden. The main content is as follows: first, we propose a novel strategy for generating artificial noise signals in the considered covert scenario. Then, we leverage the statistical information of ch&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09131v3-abstract-full').style.display = 'inline'; document.getElementById('2404.09131v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09131v3-abstract-full" style="display: none;"> In this paper, we consider a scenario of covert communication aided by multiple friendly interference nodes. The objective is to conceal the legitimate communication link under the surveillance of a warden. The main content is as follows: first, we propose a novel strategy for generating artificial noise signals in the considered covert scenario. Then, we leverage the statistical information of channel coefficients to optimize the basis matrix of the artificial noise signals space in the absence of accurate channel fading information between the friendly interference nodes and the legitimate receiver. The optimization problem aims to design artificial noise signals within the space to facilitate covert communication while minimizing the impact on the performance of legitimate communication. Second, a customized Rimannian Stochastic Variance Reduced Gradient (R-SVRG) algorithm is proposed to solve the non-convex problem. In the algorithm, we employ the Riemannian optimization framework to analyze the geometric structure of the basis matrix constraints and transform the original non-convex optimization problem into an unconstrained problem on the complex Stiefel manifold for solution. Third, we theoretically prove the convergence of the proposed algorithm to a stationary point. In the end, we evaluate the performance of the proposed strategy for generating artificial noise signals through numerical simulations. The results demonstrate that our approach significantly outperforms the Gaussian artificial noise strategy without optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09131v3-abstract-full').style.display = 'none'; document.getElementById('2404.09131v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.02731">arXiv:2404.02731</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.02731">pdf</a>, <a href="https://arxiv.org/format/2404.02731">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Event Camera Demosaicing via Swin Transformer and Pixel-focus Loss </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Lu%2C+Y">Yunfan Lu</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+Y">Yijie Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Ma%2C+W">Wenzong Ma</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weiyu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Xiong%2C+H">Hui Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.02731v1-abstract-short" style="display: inline;"> Recent research has highlighted improvements in high-quality imaging guided by event cameras, with most of these efforts concentrating on the RGB domain. However, these advancements frequently neglect the unique challenges introduced by the inherent flaws in the sensor design of event cameras in the RAW domain. Specifically, this sensor design results in the partial loss of pixel values, posing ne&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.02731v1-abstract-full').style.display = 'inline'; document.getElementById('2404.02731v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.02731v1-abstract-full" style="display: none;"> Recent research has highlighted improvements in high-quality imaging guided by event cameras, with most of these efforts concentrating on the RGB domain. However, these advancements frequently neglect the unique challenges introduced by the inherent flaws in the sensor design of event cameras in the RAW domain. Specifically, this sensor design results in the partial loss of pixel values, posing new challenges for RAW domain processes like demosaicing. The challenge intensifies as most research in the RAW domain is based on the premise that each pixel contains a value, making the straightforward adaptation of these methods to event camera demosaicing problematic. To end this, we present a Swin-Transformer-based backbone and a pixel-focus loss function for demosaicing with missing pixel values in RAW domain processing. Our core motivation is to refine a general and widely applicable foundational model from the RGB domain for RAW domain processing, thereby broadening the model&#39;s applicability within the entire imaging process. Our method harnesses multi-scale processing and space-to-depth techniques to ensure efficiency and reduce computing complexity. We also proposed the Pixel-focus Loss function for network fine-tuning to improve network convergence based on our discovery of a long-tailed distribution in training loss. Our method has undergone validation on the MIPI Demosaic Challenge dataset, with subsequent analytical experimentation confirming its efficacy. All code and trained models are released here: https://github.com/yunfanLu/ev-demosaic <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.02731v1-abstract-full').style.display = 'none'; document.getElementById('2404.02731v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for the CVPR 2024 Workshop on Mobile Intelligent Photography &amp; Imaging</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00309">arXiv:2404.00309</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.00309">pdf</a>, <a href="https://arxiv.org/format/2404.00309">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Model-Driven Deep Learning for Distributed Detection with Binary Quantization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wei Guo</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+M">Meng He</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+C">Chuan Huang</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+H">Hengtao He</a>, <a href="/search/eess?searchtype=author&amp;query=Song%2C+S">Shenghui Song</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Letaief%2C+K+B">Khaled B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00309v1-abstract-short" style="display: inline;"> Within the realm of rapidly advancing wireless sensor networks (WSNs), distributed detection assumes a significant role in various practical applications. However, critical challenge lies in maintaining robust detection performance while operating within the constraints of limited bandwidth and energy resources. This paper introduces a novel approach that combines model-driven deep learning (DL) w&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00309v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00309v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00309v1-abstract-full" style="display: none;"> Within the realm of rapidly advancing wireless sensor networks (WSNs), distributed detection assumes a significant role in various practical applications. However, critical challenge lies in maintaining robust detection performance while operating within the constraints of limited bandwidth and energy resources. This paper introduces a novel approach that combines model-driven deep learning (DL) with binary quantization to strike a balance between communication overhead and detection performance in WSNs. We begin by establishing the lower bound of detection error probability for distributed detection using the maximum a posteriori (MAP) criterion. Furthermore, we prove the global optimality of employing identical local quantizers across sensors, thereby maximizing the corresponding Chernoff information. Subsequently, the paper derives the minimum MAP detection error probability (MAPDEP) by inplementing identical binary probabilistic quantizers across the sensors. Moreover, the paper establishes the equivalence between utilizing all quantized data and their average as input to the detector at the fusion center (FC). In particular, we derive the Kullback-Leibler (KL) divergence, which measures the difference between the true posterior probability and output of the proposed detector. Leveraging the MAPDEP and KL divergence as loss functions, the paper proposes model-driven DL method to separately train the probability controller module in the quantizer and the detector module at the FC. Numerical results validate the convergence and effectiveness of the proposed method, which achieves near-optimal performance with reduced complexity for Gaussian hypothesis testing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00309v1-abstract-full').style.display = 'none'; document.getElementById('2404.00309v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18527">arXiv:2402.18527</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.18527">pdf</a>, <a href="https://arxiv.org/format/2402.18527">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Defect Detection in Tire X-Ray Images: Conventional Methods Meet Deep Structures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Cozma%2C+A">Andrei Cozma</a>, <a href="/search/eess?searchtype=author&amp;query=Harris%2C+L">Landon Harris</a>, <a href="/search/eess?searchtype=author&amp;query=Qi%2C+H">Hairong Qi</a>, <a href="/search/eess?searchtype=author&amp;query=Ji%2C+P">Ping Ji</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenpeng Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+S">Song Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18527v1-abstract-short" style="display: inline;"> This paper introduces a robust approach for automated defect detection in tire X-ray images by harnessing traditional feature extraction methods such as Local Binary Pattern (LBP) and Gray Level Co-Occurrence Matrix (GLCM) features, as well as Fourier and Wavelet-based features, complemented by advanced machine learning techniques. Recognizing the challenges inherent in the complex patterns and te&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18527v1-abstract-full').style.display = 'inline'; document.getElementById('2402.18527v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18527v1-abstract-full" style="display: none;"> This paper introduces a robust approach for automated defect detection in tire X-ray images by harnessing traditional feature extraction methods such as Local Binary Pattern (LBP) and Gray Level Co-Occurrence Matrix (GLCM) features, as well as Fourier and Wavelet-based features, complemented by advanced machine learning techniques. Recognizing the challenges inherent in the complex patterns and textures of tire X-ray images, the study emphasizes the significance of feature engineering to enhance the performance of defect detection systems. By meticulously integrating combinations of these features with a Random Forest (RF) classifier and comparing them against advanced models like YOLOv8, the research not only benchmarks the performance of traditional features in defect detection but also explores the synergy between classical and modern approaches. The experimental results demonstrate that these traditional features, when fine-tuned and combined with machine learning models, can significantly improve the accuracy and reliability of tire defect detection, aiming to set a new standard in automated quality assurance in tire manufacturing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18527v1-abstract-full').style.display = 'none'; document.getElementById('2402.18527v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 2 figures, 3 tables, submitted to ICIP2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.7; I.4.9; I.4.0 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.18539">arXiv:2311.18539</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.18539">pdf</a>, <a href="https://arxiv.org/format/2311.18539">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Bridging Both Worlds in Semantics and Time: Domain Knowledge Based Analysis and Correlation of Industrial Process Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ike%2C+M">Moses Ike</a>, <a href="/search/eess?searchtype=author&amp;query=Phan%2C+K">Kandy Phan</a>, <a href="/search/eess?searchtype=author&amp;query=Badapanda%2C+A">Anwesh Badapanda</a>, <a href="/search/eess?searchtype=author&amp;query=Landen%2C+M">Matthew Landen</a>, <a href="/search/eess?searchtype=author&amp;query=Sadoski%2C+K">Keaton Sadoski</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wanda Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Shah%2C+A">Asfahan Shah</a>, <a href="/search/eess?searchtype=author&amp;query=Zonouz%2C+S">Saman Zonouz</a>, <a href="/search/eess?searchtype=author&amp;query=Lee%2C+W">Wenke Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.18539v2-abstract-short" style="display: inline;"> Modern industrial control systems (ICS) attacks infect supervisory control and data acquisition (SCADA) hosts to stealthily alter industrial processes, causing damage. To detect attacks with low false alarms, recent work detects attacks in both SCADA and process data. Unfortunately, this led to the same problem - disjointed (false) alerts, due to the semantic and time gap in SCADA and process beha&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.18539v2-abstract-full').style.display = 'inline'; document.getElementById('2311.18539v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.18539v2-abstract-full" style="display: none;"> Modern industrial control systems (ICS) attacks infect supervisory control and data acquisition (SCADA) hosts to stealthily alter industrial processes, causing damage. To detect attacks with low false alarms, recent work detects attacks in both SCADA and process data. Unfortunately, this led to the same problem - disjointed (false) alerts, due to the semantic and time gap in SCADA and process behavior, i.e., SCADA execution does not map to process dynamics nor evolve at similar time scales. We propose BRIDGE to analyze and correlate SCADA and industrial process attacks using domain knowledge to bridge their unique semantic and time evolution. This enables operators to tie malicious SCADA operations to their adverse process effects, which reduces false alarms and improves attack understanding. BRIDGE (i) identifies process constraints violations in SCADA by measuring actuation dependencies in SCADA process-control, and (ii) detects malicious SCADA effects in processes via a physics-informed neural network that embeds generic knowledge of inertial process dynamics. BRIDGE then dynamically aligns both analysis (i and ii) in a time-window that adjusts their time evolution based on process inertial delays. We applied BRIDGE to 11 diverse real-world industrial processes, and adaptive attacks inspired by past events. BRIDGE correlated 98.3% of attacks with 0.8% false positives (FP), compared to 78.3% detection accuracy and 13.7% FP of recent work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.18539v2-abstract-full').style.display = 'none'; document.getElementById('2311.18539v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.06879">arXiv:2310.06879</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.06879">pdf</a>, <a href="https://arxiv.org/format/2310.06879">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> The Solution for the CVPR2023 NICE Image Captioning Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wu%2C+X">Xiangyu Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+Y">Yi Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+H">Hailiang Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+Y">Yang Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weili Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Lu%2C+J">Jianfeng Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.06879v2-abstract-short" style="display: inline;"> In this paper, we present our solution to the New frontiers for Zero-shot Image Captioning Challenge. Different from the traditional image captioning datasets, this challenge includes a larger new variety of visual concepts from many domains (such as COVID-19) as well as various image types (photographs, illustrations, graphics). For the data level, we collect external training data from Laion-5B,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06879v2-abstract-full').style.display = 'inline'; document.getElementById('2310.06879v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.06879v2-abstract-full" style="display: none;"> In this paper, we present our solution to the New frontiers for Zero-shot Image Captioning Challenge. Different from the traditional image captioning datasets, this challenge includes a larger new variety of visual concepts from many domains (such as COVID-19) as well as various image types (photographs, illustrations, graphics). For the data level, we collect external training data from Laion-5B, a large-scale CLIP-filtered image-text dataset. For the model level, we use OFA, a large-scale visual-language pre-training model based on handcrafted templates, to perform the image captioning task. In addition, we introduce contrastive learning to align image-text pairs to learn new visual concepts in the pre-training stage. Then, we propose a similarity-bucket strategy and incorporate this strategy into the template to force the model to generate higher quality and more matching captions. Finally, by retrieval-augmented strategy, we construct a content-rich template, containing the most relevant top-k captions from other image-text pairs, to guide the model in generating semantic-rich captions. Our method ranks first on the leaderboard, achieving 105.17 and 325.72 Cider-Score in the validation and test phase, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06879v2-abstract-full').style.display = 'none'; document.getElementById('2310.06879v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.10935">arXiv:2309.10935</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.10935">pdf</a>, <a href="https://arxiv.org/format/2309.10935">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A Geometric Flow Approach for Segmentation of Images with Inhomongeneous Intensity and Missing Boundaries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Mohapatra%2C+P">Paramjyoti Mohapatra</a>, <a href="/search/eess?searchtype=author&amp;query=Lartey%2C+R">Richard Lartey</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weihong Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Judkovich%2C+M">Michael Judkovich</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+X">Xiaojuan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.10935v1-abstract-short" style="display: inline;"> Image segmentation is a complex mathematical problem, especially for images that contain intensity inhomogeneity and tightly packed objects with missing boundaries in between. For instance, Magnetic Resonance (MR) muscle images often contain both of these issues, making muscle segmentation especially difficult. In this paper we propose a novel intensity correction and a semi-automatic active conto&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.10935v1-abstract-full').style.display = 'inline'; document.getElementById('2309.10935v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.10935v1-abstract-full" style="display: none;"> Image segmentation is a complex mathematical problem, especially for images that contain intensity inhomogeneity and tightly packed objects with missing boundaries in between. For instance, Magnetic Resonance (MR) muscle images often contain both of these issues, making muscle segmentation especially difficult. In this paper we propose a novel intensity correction and a semi-automatic active contour based segmentation approach. The approach uses a geometric flow that incorporates a reproducing kernel Hilbert space (RKHS) edge detector and a geodesic distance penalty term from a set of markers and anti-markers. We test the proposed scheme on MR muscle segmentation and compare with some state of the art methods. To help deal with the intensity inhomogeneity in this particular kind of image, a new approach to estimate the bias field using a fat fraction image, called Prior Bias-Corrected Fuzzy C-means (PBCFCM), is introduced. Numerical experiments show that the proposed scheme leads to significantly better results than compared ones. The average dice values of the proposed method are 92.5%, 85.3%, 85.3% for quadriceps, hamstrings and other muscle groups while other approaches are at least 10% worse. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.10935v1-abstract-full').style.display = 'none'; document.getElementById('2309.10935v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at CVIT 2023 Conference. Accepted to Journal of Image and Graphics</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.01112">arXiv:2309.01112</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.01112">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Swing Leg Motion Strategy for Heavy-load Legged Robot Based on Force Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Fu%2C+Z">Ze Fu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+Y">Yinghui Li</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weizhong Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.01112v1-abstract-short" style="display: inline;"> The heavy-load legged robot has strong load carrying capacity and can adapt to various unstructured terrains. But the large weight results in higher requirements for motion stability and environmental perception ability. In order to utilize force sensing information to improve its motion performance, in this paper, we propose a finite state machine model for the swing leg in the static gait by imi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01112v1-abstract-full').style.display = 'inline'; document.getElementById('2309.01112v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.01112v1-abstract-full" style="display: none;"> The heavy-load legged robot has strong load carrying capacity and can adapt to various unstructured terrains. But the large weight results in higher requirements for motion stability and environmental perception ability. In order to utilize force sensing information to improve its motion performance, in this paper, we propose a finite state machine model for the swing leg in the static gait by imitating the movement of the elephant. Based on the presence or absence of additional terrain information, different trajectory planning strategies are provided for the swing leg to enhance the success rate of stepping and save energy. The experimental results on a novel quadruped robot show that our method has strong robustness and can enable heavy-load legged robots to pass through various complex terrains autonomously and smoothly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.01112v1-abstract-full').style.display = 'none'; document.getElementById('2309.01112v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.08283">arXiv:2308.08283</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.08283">pdf</a>, <a href="https://arxiv.org/format/2308.08283">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CARE: A Large Scale CT Image Dataset and Clinical Applicable Benchmark Model for Rectal Cancer Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+H">Hantao Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weidong Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Qiu%2C+C">Chenyang Qiu</a>, <a href="/search/eess?searchtype=author&amp;query=Wan%2C+S">Shouhong Wan</a>, <a href="/search/eess?searchtype=author&amp;query=Zou%2C+B">Bingbing Zou</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+W">Wanqin Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Jin%2C+P">Peiquan Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.08283v1-abstract-short" style="display: inline;"> Rectal cancer segmentation of CT image plays a crucial role in timely clinical diagnosis, radiotherapy treatment, and follow-up. Although current segmentation methods have shown promise in delineating cancerous tissues, they still encounter challenges in achieving high segmentation precision. These obstacles arise from the intricate anatomical structures of the rectum and the difficulties in perfo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.08283v1-abstract-full').style.display = 'inline'; document.getElementById('2308.08283v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.08283v1-abstract-full" style="display: none;"> Rectal cancer segmentation of CT image plays a crucial role in timely clinical diagnosis, radiotherapy treatment, and follow-up. Although current segmentation methods have shown promise in delineating cancerous tissues, they still encounter challenges in achieving high segmentation precision. These obstacles arise from the intricate anatomical structures of the rectum and the difficulties in performing differential diagnosis of rectal cancer. Additionally, a major obstacle is the lack of a large-scale, finely annotated CT image dataset for rectal cancer segmentation. To address these issues, this work introduces a novel large scale rectal cancer CT image dataset CARE with pixel-level annotations for both normal and cancerous rectum, which serves as a valuable resource for algorithm research and clinical application development. Moreover, we propose a novel medical cancer lesion segmentation benchmark model named U-SAM. The model is specifically designed to tackle the challenges posed by the intricate anatomical structures of abdominal organs by incorporating prompt information. U-SAM contains three key components: promptable information (e.g., points) to aid in target area localization, a convolution module for capturing low-level lesion details, and skip-connections to preserve and recover spatial information during the encoding-decoding process. To evaluate the effectiveness of U-SAM, we systematically compare its performance with several popular segmentation methods on the CARE dataset. The generalization of the model is further verified on the WORD dataset. Extensive experiments demonstrate that the proposed U-SAM outperforms state-of-the-art methods on these two datasets. These experiments can serve as the baseline for future research and clinical application development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.08283v1-abstract-full').style.display = 'none'; document.getElementById('2308.08283v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.14097">arXiv:2306.14097</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.14097">pdf</a>, <a href="https://arxiv.org/format/2306.14097">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Interpretable Small Training Set Image Segmentation Network Originated from Multi-Grid Variational Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Meng%2C+J">Junying Meng</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weihong Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+J">Jun Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+M">Mingrui Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.14097v1-abstract-short" style="display: inline;"> The main objective of image segmentation is to divide an image into homogeneous regions for further analysis. This is a significant and crucial task in many applications such as medical imaging. Deep learning (DL) methods have been proposed and widely used for image segmentation. However, these methods usually require a large amount of manually segmented data as training data and suffer from poor&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14097v1-abstract-full').style.display = 'inline'; document.getElementById('2306.14097v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.14097v1-abstract-full" style="display: none;"> The main objective of image segmentation is to divide an image into homogeneous regions for further analysis. This is a significant and crucial task in many applications such as medical imaging. Deep learning (DL) methods have been proposed and widely used for image segmentation. However, these methods usually require a large amount of manually segmented data as training data and suffer from poor interpretability (known as the black box problem). The classical Mumford-Shah (MS) model is effective for segmentation and provides a piece-wise smooth approximation of the original image. In this paper, we replace the hand-crafted regularity term in the MS model with a data adaptive generalized learnable regularity term and use a multi-grid framework to unroll the MS model and obtain a variational model-based segmentation network with better generalizability and interpretability. This approach allows for the incorporation of learnable prior information into the network structure design. Moreover, the multi-grid framework enables multi-scale feature extraction and offers a mathematical explanation for the effectiveness of the U-shaped network structure in producing good image segmentation results. Due to the proposed network originates from a variational model, it can also handle small training sizes. Our experiments on the REFUGE dataset, the White Blood Cell image dataset, and 3D thigh muscle magnetic resonance (MR) images demonstrate that even with smaller training datasets, our method yields better segmentation results compared to related state of the art segmentation methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14097v1-abstract-full').style.display = 'none'; document.getElementById('2306.14097v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 9 figures, 6 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 94A08; 68U10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.00303">arXiv:2306.00303</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.00303">pdf</a>, <a href="https://arxiv.org/format/2306.00303">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Sea Ice Extraction via Remote Sensed Imagery: Algorithms, Datasets, Applications and Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Yu%2C+A">Anzhu Yu</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+W">Wenjun Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+Q">Qing Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+Q">Qun Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenyue Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Ji%2C+S">Song Ji</a>, <a href="/search/eess?searchtype=author&amp;query=Wen%2C+B">Bowei Wen</a>, <a href="/search/eess?searchtype=author&amp;query=Qiu%2C+C">Chunping Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.00303v1-abstract-short" style="display: inline;"> The deep learning, which is a dominating technique in artificial intelligence, has completely changed the image understanding over the past decade. As a consequence, the sea ice extraction (SIE) problem has reached a new era. We present a comprehensive review of four important aspects of SIE, including algorithms, datasets, applications, and the future trends. Our review focuses on researches publ&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00303v1-abstract-full').style.display = 'inline'; document.getElementById('2306.00303v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.00303v1-abstract-full" style="display: none;"> The deep learning, which is a dominating technique in artificial intelligence, has completely changed the image understanding over the past decade. As a consequence, the sea ice extraction (SIE) problem has reached a new era. We present a comprehensive review of four important aspects of SIE, including algorithms, datasets, applications, and the future trends. Our review focuses on researches published from 2016 to the present, with a specific focus on deep learning-based approaches in the last five years. We divided all relegated algorithms into 3 categories, including classical image segmentation approach, machine learning-based approach and deep learning-based methods. We reviewed the accessible ice datasets including SAR-based datasets, the optical-based datasets and others. The applications are presented in 4 aspects including climate research, navigation, geographic information systems (GIS) production and others. It also provides insightful observations and inspiring future research directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00303v1-abstract-full').style.display = 'none'; document.getElementById('2306.00303v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.01249">arXiv:2303.01249</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.01249">pdf</a>, <a href="https://arxiv.org/ps/2303.01249">ps</a>, <a href="https://arxiv.org/format/2303.01249">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Language-Universal Adapter Learning with Knowledge Distillation for End-to-End Multilingual Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Shen%2C+Z">Zhijie Shen</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.01249v1-abstract-short" style="display: inline;"> In this paper, we propose a language-universal adapter learning framework based on a pre-trained model for end-to-end multilingual automatic speech recognition (ASR). For acoustic modeling, the wav2vec 2.0 pre-trained model is fine-tuned by inserting language-specific and language-universal adapters. An online knowledge distillation is then used to enable the language-universal adapters to learn b&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01249v1-abstract-full').style.display = 'inline'; document.getElementById('2303.01249v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.01249v1-abstract-full" style="display: none;"> In this paper, we propose a language-universal adapter learning framework based on a pre-trained model for end-to-end multilingual automatic speech recognition (ASR). For acoustic modeling, the wav2vec 2.0 pre-trained model is fine-tuned by inserting language-specific and language-universal adapters. An online knowledge distillation is then used to enable the language-universal adapters to learn both language-specific and universal features. The linguistic information confusion is also reduced by leveraging language identifiers (LIDs). With LIDs we perform a position-wise modification on the multi-head attention outputs. In the inference procedure, the language-specific adapters are removed while the language-universal adapters are kept activated. The proposed method improves the recognition accuracy and addresses the linear increase of the number of adapters&#39; parameters with the number of languages in common multilingual ASR systems. Experiments on the BABEL dataset confirm the effectiveness of the proposed framework. Compared to the conventional multilingual model, a 3.3% absolute error rate reduction is achieved. The code is available at: https://github.com/shen9712/UniversalAdapterLearning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01249v1-abstract-full').style.display = 'none'; document.getElementById('2303.01249v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.12428">arXiv:2302.12428</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.12428">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A holistically 3D-printed flexible millimeter-wave Doppler radar: Towards fully printed high-frequency multilayer flexible hybrid electronics systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Tang%2C+H">Hong Tang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+Y">Yingjie Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+B">Bowen Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=An%2C+S">Sensong An</a>, <a href="/search/eess?searchtype=author&amp;query=Haerinia%2C+M">Mohammad Haerinia</a>, <a href="/search/eess?searchtype=author&amp;query=Dong%2C+Y">Yunxi Dong</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+Y">Yi Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wei Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+H">Hualiang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.12428v1-abstract-short" style="display: inline;"> Flexible hybrid electronics (FHE) is an emerging technology enabled through the integration of advanced semiconductor devices and 3D printing technology. It unlocks tremendous market potential by realizing low-cost flexible circuits and systems that can be conformally integrated into various applications. However, the operating frequencies of most reported FHE systems are relatively low. It is als&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.12428v1-abstract-full').style.display = 'inline'; document.getElementById('2302.12428v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.12428v1-abstract-full" style="display: none;"> Flexible hybrid electronics (FHE) is an emerging technology enabled through the integration of advanced semiconductor devices and 3D printing technology. It unlocks tremendous market potential by realizing low-cost flexible circuits and systems that can be conformally integrated into various applications. However, the operating frequencies of most reported FHE systems are relatively low. It is also worth to note that reported FHE systems have been limited to relatively simple design concept (since complex systems will impose challenges in aspects such as multilayer interconnections, printing materials, and bonding layers). Here, we report a fully 3D-printed flexible four-layer millimeter-wave Doppler radar (i.e., a millimeter-wave FHE system). The sensing performance and flexibility of the 3D-printed radar are characterized and validated by general field tests and bending tests, respectively. Our results demonstrate the feasibility of developing fully 3D-printed high-frequency multilayer FHE, which can be conformally integrated into irregular surfaces (e.g., vehicle bumpers) for applications such as vehicle radars and wearable electronics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.12428v1-abstract-full').style.display = 'none'; document.getElementById('2302.12428v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 78-05 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.02147">arXiv:2211.02147</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.02147">pdf</a>, <a href="https://arxiv.org/format/2211.02147">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.engappai.2024.108911">10.1016/j.engappai.2024.108911 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Survey on Reinforcement Learning in Aviation Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Razzaghi%2C+P">Pouria Razzaghi</a>, <a href="/search/eess?searchtype=author&amp;query=Tabrizian%2C+A">Amin Tabrizian</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wei Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+S">Shulu Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Taye%2C+A">Abenezer Taye</a>, <a href="/search/eess?searchtype=author&amp;query=Thompson%2C+E">Ellis Thompson</a>, <a href="/search/eess?searchtype=author&amp;query=Bregeon%2C+A">Alexis Bregeon</a>, <a href="/search/eess?searchtype=author&amp;query=Baheri%2C+A">Ali Baheri</a>, <a href="/search/eess?searchtype=author&amp;query=Wei%2C+P">Peng Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.02147v3-abstract-short" style="display: inline;"> Compared with model-based control and optimization methods, reinforcement learning (RL) provides a data-driven, learning-based framework to formulate and solve sequential decision-making problems. The RL framework has become promising due to largely improved data availability and computing power in the aviation industry. Many aviation-based applications can be formulated or treated as sequential d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.02147v3-abstract-full').style.display = 'inline'; document.getElementById('2211.02147v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.02147v3-abstract-full" style="display: none;"> Compared with model-based control and optimization methods, reinforcement learning (RL) provides a data-driven, learning-based framework to formulate and solve sequential decision-making problems. The RL framework has become promising due to largely improved data availability and computing power in the aviation industry. Many aviation-based applications can be formulated or treated as sequential decision-making problems. Some of them are offline planning problems, while others need to be solved online and are safety-critical. In this survey paper, we first describe standard RL formulations and solutions. Then we survey the landscape of existing RL-based applications in aviation. Finally, we summarize the paper, identify the technical gaps, and suggest future directions of RL research in aviation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.02147v3-abstract-full').style.display = 'none'; document.getElementById('2211.02147v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Engineering Applications of Artificial Intelligence Volume 136, Part A Engineering Applications of Artificial Intelligence, Vol 136, October 2024, 108911 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.10955">arXiv:2206.10955</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2206.10955">pdf</a>, <a href="https://arxiv.org/format/2206.10955">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Reconfigurable Intelligent Surface Against Physical Layer Key Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wei%2C+Z">Zhuangkun Wei</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+B">Bin Li</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.10955v2-abstract-short" style="display: inline;"> The development of reconfigurable intelligent surfaces (RIS) has recently advanced the research of physical layer security (PLS). Beneficial impacts of RIS include but are not limited to offering a new degree-of-freedom (DoF) for key-less PLS optimization, and increasing channel randomness for physical layer secret key generation (PL-SKG). However, there is a lack of research studying how adversar&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.10955v2-abstract-full').style.display = 'inline'; document.getElementById('2206.10955v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.10955v2-abstract-full" style="display: none;"> The development of reconfigurable intelligent surfaces (RIS) has recently advanced the research of physical layer security (PLS). Beneficial impacts of RIS include but are not limited to offering a new degree-of-freedom (DoF) for key-less PLS optimization, and increasing channel randomness for physical layer secret key generation (PL-SKG). However, there is a lack of research studying how adversarial RIS can be used to attack and obtain legitimate secret keys generated by PL-SKG. In this work, we show an Eve-controlled adversarial RIS (Eve-RIS), by inserting into the legitimate channel a random and reciprocal channel, can partially reconstruct the secret keys from the legitimate PL-SKG process. To operationalize this concept, we design Eve-RIS schemes against two PL-SKG techniques used: (i) the CSI-based PL-SKG, and (ii) the two-way cross multiplication based PL-SKG. The channel probing at Eve-RIS is realized by compressed sensing designs with a small number of radio-frequency (RF) chains. Then, the optimal RIS phase is obtained by maximizing the Eve-RIS inserted deceiving channel. Our analysis and results show that even with a passive RIS, our proposed Eve-RIS can achieve a high key match rate with legitimate users, and is resistant to most of the current defensive approaches. This means the novel Eve-RIS provides a new eavesdropping threat on PL-SKG, which can spur new research areas to counter adversarial RIS attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.10955v2-abstract-full').style.display = 'none'; document.getElementById('2206.10955v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.09316">arXiv:2205.09316</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.09316">pdf</a>, <a href="https://arxiv.org/format/2205.09316">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Clustering and Power Control for Two-Tier Wireless Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wei Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+C">Chuan Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Qin%2C+X">Xiaoqi Qin</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+L">Lian Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.09316v1-abstract-short" style="display: inline;"> Federated learning (FL) has been recognized as a promising distributed learning paradigm to support intelligent applications at the wireless edge, where a global model is trained iteratively through the collaboration of the edge devices without sharing their data. However, due to the relatively large communication cost between the devices and parameter server (PS), direct computing based on the in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09316v1-abstract-full').style.display = 'inline'; document.getElementById('2205.09316v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.09316v1-abstract-full" style="display: none;"> Federated learning (FL) has been recognized as a promising distributed learning paradigm to support intelligent applications at the wireless edge, where a global model is trained iteratively through the collaboration of the edge devices without sharing their data. However, due to the relatively large communication cost between the devices and parameter server (PS), direct computing based on the information from the devices may not be resource efficient. This paper studies the joint communication and learning design for the over-the-air computation (AirComp)-based two-tier wireless FL scheme, where the lead devices first collect the local gradients from their nearby subordinate devices, and then send the merged results to the PS for the second round of aggregation. We establish a convergence result for the proposed scheme and derive the upper bound on the optimality gap between the expected and optimal global loss values. Next, based on the device distance and data importance, we propose a hierarchical clustering method to build the two-tier structure. Then, with only the instantaneous channel state information (CSI), we formulate the optimality gap minimization problem and solve it by using an efficient alternating minimization method. Numerical results show that the proposed scheme outperforms the baseline ones. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09316v1-abstract-full').style.display = 'none'; document.getElementById('2205.09316v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.09306">arXiv:2205.09306</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.09306">pdf</a>, <a href="https://arxiv.org/format/2205.09306">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Joint Device Selection and Power Control for Wireless Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wei Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+R">Ran Li</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+C">Chuan Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Qin%2C+X">Xiaoqi Qin</a>, <a href="/search/eess?searchtype=author&amp;query=Shen%2C+K">Kaiming Shen</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.09306v1-abstract-short" style="display: inline;"> This paper studies the joint device selection and power control scheme for wireless federated learning (FL), considering both the downlink and uplink communications between the parameter server (PS) and the terminal devices. In each round of model training, the PS first broadcasts the global model to the terminal devices in an analog fashion, and then the terminal devices perform local training an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09306v1-abstract-full').style.display = 'inline'; document.getElementById('2205.09306v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.09306v1-abstract-full" style="display: none;"> This paper studies the joint device selection and power control scheme for wireless federated learning (FL), considering both the downlink and uplink communications between the parameter server (PS) and the terminal devices. In each round of model training, the PS first broadcasts the global model to the terminal devices in an analog fashion, and then the terminal devices perform local training and upload the updated model parameters to the PS via over-the-air computation (AirComp). First, we propose an AirComp-based adaptive reweighing scheme for the aggregation of local updated models, where the model aggregation weights are directly determined by the uplink transmit power values of the selected devices and which enables the joint learning and communication optimization simply by the device selection and power control. Furthermore, we provide a convergence analysis for the proposed wireless FL algorithm and the upper bound on the expected optimality gap between the expected and optimal global loss values is derived. With instantaneous channel state information (CSI), we formulate the optimality gap minimization problems under both the individual and sum uplink transmit power constraints, respectively, which are shown to be solved by the semidefinite programming (SDR) technique. Numerical results reveal that our proposed wireless FL algorithm achieves close to the best performance by using the ideal FedAvg scheme with error-free model exchange and full device participation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09306v1-abstract-full').style.display = 'none'; document.getElementById('2205.09306v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.00581">arXiv:2205.00581</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.00581">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Using a novel fractional-order gradient method for CNN back-propagation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Taresh%2C+M+M">Mundher Mohammed Taresh</a>, <a href="/search/eess?searchtype=author&amp;query=Zhu%2C+N">Ningbo Zhu</a>, <a href="/search/eess?searchtype=author&amp;query=Ali%2C+T+A+A">Talal Ahmed Ali Ali</a>, <a href="/search/eess?searchtype=author&amp;query=Alghaili%2C+M">Mohammed Alghaili</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weihua Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.00581v1-abstract-short" style="display: inline;"> Computer-aided diagnosis tools have experienced rapid growth and development in recent years. Among all, deep learning is the most sophisticated and popular tool. In this paper, researchers propose a novel deep learning model and apply it to COVID-19 diagnosis. Our model uses the tool of fractional calculus, which has the potential to improve the performance of gradient methods. To this end, the r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.00581v1-abstract-full').style.display = 'inline'; document.getElementById('2205.00581v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.00581v1-abstract-full" style="display: none;"> Computer-aided diagnosis tools have experienced rapid growth and development in recent years. Among all, deep learning is the most sophisticated and popular tool. In this paper, researchers propose a novel deep learning model and apply it to COVID-19 diagnosis. Our model uses the tool of fractional calculus, which has the potential to improve the performance of gradient methods. To this end, the researcher proposes a fractional-order gradient method for the back-propagation of convolutional neural networks based on the Caputo definition. However, if only the first term of the infinite series of the Caputo definition is used to approximate the fractional-order derivative, the length of the memory is truncated. Therefore, the fractional-order gradient (FGD) method with a fixed memory step and an adjustable number of terms is used to update the weights of the layers. Experiments were performed on the COVIDx dataset to demonstrate fast convergence, good accuracy, and the ability to bypass the local optimal point. We also compared the performance of the developed fractional-order neural networks and Integer-order neural networks. The results confirmed the effectiveness of our proposed model in the diagnosis of COVID-19. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.00581v1-abstract-full').style.display = 'none'; document.getElementById('2205.00581v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 6 figuers</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> D.1.2; F.3.1; F.4.1 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.2.2, I.2.7 K.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.07210">arXiv:2201.07210</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2201.07210">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Efficient Training of Spiking Neural Networks with Temporally-Truncated Local Backpropagation through Time </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenzhe Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Fouda%2C+M+E">Mohammed E. Fouda</a>, <a href="/search/eess?searchtype=author&amp;query=Eltawil%2C+A+M">Ahmed M. Eltawil</a>, <a href="/search/eess?searchtype=author&amp;query=Salama%2C+K+N">Khaled Nabil Salama</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.07210v1-abstract-short" style="display: inline;"> Directly training spiking neural networks (SNNs) has remained challenging due to complex neural dynamics and intrinsic non-differentiability in firing functions. The well-known backpropagation through time (BPTT) algorithm proposed to train SNNs suffers from large memory footprint and prohibits backward and update unlocking, making it impossible to exploit the potential of locally-supervised train&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.07210v1-abstract-full').style.display = 'inline'; document.getElementById('2201.07210v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.07210v1-abstract-full" style="display: none;"> Directly training spiking neural networks (SNNs) has remained challenging due to complex neural dynamics and intrinsic non-differentiability in firing functions. The well-known backpropagation through time (BPTT) algorithm proposed to train SNNs suffers from large memory footprint and prohibits backward and update unlocking, making it impossible to exploit the potential of locally-supervised training methods. This work proposes an efficient and direct training algorithm for SNNs that integrates a locally-supervised training method with a temporally-truncated BPTT algorithm. The proposed algorithm explores both temporal and spatial locality in BPTT and contributes to significant reduction in computational cost including GPU memory utilization, main memory access and arithmetic operations. We thoroughly explore the design space concerning temporal truncation length and local training block size and benchmark their impact on classification accuracy of different networks running different types of tasks. The results reveal that temporal truncation has a negative effect on the accuracy of classifying frame-based datasets, but leads to improvement in accuracy on dynamic-vision-sensor (DVS) recorded datasets. In spite of resulting information loss, local training is capable of alleviating overfitting. The combined effect of temporal truncation and local training can lead to the slowdown of accuracy drop and even improvement in accuracy. In addition, training deep SNNs models such as AlexNet classifying CIFAR10-DVS dataset leads to 7.26% increase in accuracy, 89.94% reduction in GPU memory, 10.79% reduction in memory access, and 99.64% reduction in MAC operations compared to the standard end-to-end BPTT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.07210v1-abstract-full').style.display = 'none'; document.getElementById('2201.07210v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.00428">arXiv:2111.00428</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2111.00428">pdf</a>, <a href="https://arxiv.org/format/2111.00428">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Reconfigurable Intelligent Surface-induced Randomness for mmWave Key Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Yang%2C+S">Shubo Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Han%2C+H">Han Han</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Y">Yihong Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weisi Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Pang%2C+Z">Zhibo Pang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+L">Lei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.00428v2-abstract-short" style="display: inline;"> Secret key generation in physical layer security exploits the unpredictable random nature of wireless channels. The millimeter-wave (mmWave) channels have limited multipath and channel randomness in static environments. In this paper, for mmWave secret key generation of physical layer security, we use a reconfigurable intelligent surface (RIS) to induce randomness directly in wireless environments&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.00428v2-abstract-full').style.display = 'inline'; document.getElementById('2111.00428v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.00428v2-abstract-full" style="display: none;"> Secret key generation in physical layer security exploits the unpredictable random nature of wireless channels. The millimeter-wave (mmWave) channels have limited multipath and channel randomness in static environments. In this paper, for mmWave secret key generation of physical layer security, we use a reconfigurable intelligent surface (RIS) to induce randomness directly in wireless environments, without adding complexity to transceivers. We consider RIS to have continuous individual phase shifts (CIPS) and derive the RIS-assisted reflection channel distribution with its parameters. Then, we propose continuous group phase shifts (CGPS) to increase the randomness specifically at legal parties. Since the continuous phase shifts are expensive to implement, we analyze discrete individual phase shifts (DIPS) and derive the corresponding channel distribution, which is dependent on the quantization bit. We then derive the secret key rate (SKR) to evaluate the randomness performance. With the simulation results verifying the analytical results, this work explains the mathematical principles and lays a foundation for future mmWave evaluation and optimization of artificial channel randomness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.00428v2-abstract-full').style.display = 'none'; document.getElementById('2111.00428v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Add contents, including continuous group phase shifts and secret key rate analysis</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.12785">arXiv:2110.12785</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.12785">pdf</a>, <a href="https://arxiv.org/format/2110.12785">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Random Matrix based Physical Layer Secret Key Generation in Static Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wei%2C+Z">Zhuangkun Wei</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.12785v1-abstract-short" style="display: inline;"> Physical layer secret key generation exploits the reciprocal channel randomness for key generation and has proven to be an effective addition security layer in wireless communications. However, static or scarcely random channels require artificially induced dynamics to improve the secrecy performance, e.g., using intelligent reflecting surface (IRS). One key challenge is that the induced random ph&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.12785v1-abstract-full').style.display = 'inline'; document.getElementById('2110.12785v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.12785v1-abstract-full" style="display: none;"> Physical layer secret key generation exploits the reciprocal channel randomness for key generation and has proven to be an effective addition security layer in wireless communications. However, static or scarcely random channels require artificially induced dynamics to improve the secrecy performance, e.g., using intelligent reflecting surface (IRS). One key challenge is that the induced random phase from IRS is also reflected in the direction to eavesdroppers (Eve). This leakage enables Eve nodes to estimate the legitimate channels and secret key via a globally known pilot sequence. To mitigate the secret key leakage issue, we propose to exploit random matrix theory to inform the design of a new physical layer secret key generation (PL-SKG) algorithm. We prove that, when sending appropriate random Gaussian matrices, the singular values of Alice&#39;s and Bob&#39;s received signals follow a similar probability distribution. Leveraging these common singular values, we propose a random Gaussian matrix based PL-SKG (RGM PL-SKG), which avoids the usages of the globally known pilot and thereby prevents the aforementioned leakage issue. Our results show the following: (i) high noise resistance which leads to superior secret key rate (SKR) improvement (up to 300%) in low SNR regime, and (ii) general improved SKR performance against multiple colluded Eves. We believe our combination of random matrix theory and PL-SKG shows a new paradigm to secure the wireless communication channels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.12785v1-abstract-full').style.display = 'none'; document.getElementById('2110.12785v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.10435">arXiv:2110.10435</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.10435">pdf</a>, <a href="https://arxiv.org/format/2110.10435">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> RSS-based Multiple Sources Localization with Unknown Log-normal Shadow Fading </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Chu%2C+Y">Yueyan Chu</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenbin Guo</a>, <a href="/search/eess?searchtype=author&amp;query=You%2C+K">Kangyong You</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+L">Lei Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Peng%2C+T">Tao Peng</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+W">Wenbo Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.10435v1-abstract-short" style="display: inline;"> Multi-source localization based on received signal strength (RSS) has drawn great interest in wireless sensor networks. However, the shadow fading term caused by obstacles cannot be separated from the received signal, which leads to severe error in location estimate. In this paper, we approximate the log-normal sum distribution through Fenton-Wilkinson method to formulate a non-convex maximum like&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.10435v1-abstract-full').style.display = 'inline'; document.getElementById('2110.10435v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.10435v1-abstract-full" style="display: none;"> Multi-source localization based on received signal strength (RSS) has drawn great interest in wireless sensor networks. However, the shadow fading term caused by obstacles cannot be separated from the received signal, which leads to severe error in location estimate. In this paper, we approximate the log-normal sum distribution through Fenton-Wilkinson method to formulate a non-convex maximum likelihood (ML) estimator with unknown shadow fading factor. In order to overcome the difficulty in solving the non-convex problem, we propose a novel algorithm to estimate the locations of sources. Specifically, the region is divided into $N$ grids firstly, and the multi-source localization is converted into a sparse recovery problem so that we can obtain the sparse solution. Then we utilize the K-means clustering method to obtain the rough locations of the off-grid sources as the initial feasible point of the ML estimator. Finally, an iterative refinement of the estimated locations is proposed by dynamic updating of the localization dictionary. The proposed algorithm can efficiently approach a superior local optimal solution of the ML estimator. It is shown from the simulation results that the proposed method has a promising localization performance and improves the robustness for multi-source localization in unknown shadow fading environments. Moreover, the proposed method provides a better computational complexity from $O(K^3N^3)$ to $O(N^3)$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.10435v1-abstract-full').style.display = 'none'; document.getElementById('2110.10435v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 10 figures. arXiv admin note: substantial text overlap with arXiv:2105.15097</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.08637">arXiv:2106.08637</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.08637">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Topic Classification on Spoken Documents Using Deep Acoustic and Linguistic Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+T">Tan Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.08637v1-abstract-short" style="display: inline;"> Topic classification systems on spoken documents usually consist of two modules: an automatic speech recognition (ASR) module to convert speech into text and a text topic classification (TTC) module to predict the topic class from the decoded text. In this paper, instead of using the ASR transcripts, the fusion of deep acoustic and linguistic features is used for topic classification on spoken doc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.08637v1-abstract-full').style.display = 'inline'; document.getElementById('2106.08637v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.08637v1-abstract-full" style="display: none;"> Topic classification systems on spoken documents usually consist of two modules: an automatic speech recognition (ASR) module to convert speech into text and a text topic classification (TTC) module to predict the topic class from the decoded text. In this paper, instead of using the ASR transcripts, the fusion of deep acoustic and linguistic features is used for topic classification on spoken documents. More specifically, a conventional CTC-based acoustic model (AM) using phonemes as output units is first trained, and the outputs of the layer before the linear phoneme classifier in the trained AM are used as the deep acoustic features of spoken documents. Furthermore, these deep acoustic features are fed to a phoneme-to-word (P2W) module to obtain deep linguistic features. Finally, a local multi-head attention module is proposed to fuse these two types of deep features for topic classification. Experiments conducted on a subset selected from Switchboard corpus show that our proposed framework outperforms the conventional ASR+TTC systems and achieves a 3.13% improvement in ACC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.08637v1-abstract-full').style.display = 'none'; document.getElementById('2106.08637v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.15097">arXiv:2105.15097</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2105.15097">pdf</a>, <a href="https://arxiv.org/format/2105.15097">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Multiple Sources Localization with Sparse Recovery under Log-normal Shadow Fading </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Chu%2C+Y">Yueyan Chu</a>, <a href="/search/eess?searchtype=author&amp;query=You%2C+K">Kangyong You</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenbin Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.15097v1-abstract-short" style="display: inline;"> Localization based on received signal strength (RSS) has drawn great interest in the wireless sensor network (WSN). In this paper, we investigate the RSS-based multi-sources localization problem with unknown transmitted power under shadow fading. The log-normal shadowing effect is approximated through Fenton-Wilkinson (F-W) method and maximum likelihood estimation is adopted to optimize the RSS-ba&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.15097v1-abstract-full').style.display = 'inline'; document.getElementById('2105.15097v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.15097v1-abstract-full" style="display: none;"> Localization based on received signal strength (RSS) has drawn great interest in the wireless sensor network (WSN). In this paper, we investigate the RSS-based multi-sources localization problem with unknown transmitted power under shadow fading. The log-normal shadowing effect is approximated through Fenton-Wilkinson (F-W) method and maximum likelihood estimation is adopted to optimize the RSS-based multiple sources localization problem. Moreover, we exploit a sparse recovery and weighted average of candidates (SR-WAC) based method to set up an initiation, which can efficiently approach a superior local optimal solution. It is shown from the simulation results that the proposed method has a much higher localization accuracy and outperforms the other <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.15097v1-abstract-full').style.display = 'none'; document.getElementById('2105.15097v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.00230">arXiv:2104.00230</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2104.00230">pdf</a>, <a href="https://arxiv.org/format/2104.00230">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Bidirectional Multiscale Feature Aggregation for Speaker Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Qi%2C+J">Jiajun Qi</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.00230v1-abstract-short" style="display: inline;"> In this paper, we propose a novel bidirectional multiscale feature aggregation (BMFA) network with attentional fusion modules for text-independent speaker verification. The feature maps from different stages of the backbone network are iteratively combined and refined in both a bottom-up and top-down manner. Furthermore, instead of simple concatenation or element-wise addition of feature maps from&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.00230v1-abstract-full').style.display = 'inline'; document.getElementById('2104.00230v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.00230v1-abstract-full" style="display: none;"> In this paper, we propose a novel bidirectional multiscale feature aggregation (BMFA) network with attentional fusion modules for text-independent speaker verification. The feature maps from different stages of the backbone network are iteratively combined and refined in both a bottom-up and top-down manner. Furthermore, instead of simple concatenation or element-wise addition of feature maps from different stages, an attentional fusion module is designed to compute the fusion weights. Experiments are conducted on the NIST SRE16 and VoxCeleb1 datasets. The experimental results demonstrate the effectiveness of the bidirectional aggregation strategy and show that the proposed attentional fusion module can further improve the performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.00230v1-abstract-full').style.display = 'none'; document.getElementById('2104.00230v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.15421">arXiv:2103.15421</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2103.15421">pdf</a>, <a href="https://arxiv.org/format/2103.15421">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Improved Meta-Learning Training for Speaker Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Chen%2C+Y">Yafeng Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.15421v2-abstract-short" style="display: inline;"> Meta-learning has recently become a research hotspot in speaker verification (SV). We introduce two methods to improve the meta-learning training for SV in this paper. For the first method, a backbone embedding network is first jointly trained with the conventional cross entropy loss and prototypical networks (PN) loss. Then, inspired by speaker adaptive training in speech recognition, additional&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.15421v2-abstract-full').style.display = 'inline'; document.getElementById('2103.15421v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.15421v2-abstract-full" style="display: none;"> Meta-learning has recently become a research hotspot in speaker verification (SV). We introduce two methods to improve the meta-learning training for SV in this paper. For the first method, a backbone embedding network is first jointly trained with the conventional cross entropy loss and prototypical networks (PN) loss. Then, inspired by speaker adaptive training in speech recognition, additional transformation coefficients are trained with only the PN loss. The transformation coefficients are used to modify the original backbone embedding network in the x-vector extraction process. Furthermore, the random erasing data augmentation technique is applied to all support samples in each episode to construct positive pairs, and a contrastive loss between the augmented and the original support samples is added to the objective in model training. Experiments are carried out on the SITW and VOiCES databases. Both of the methods can obtain consistent improvements over existing meta-learning training frameworks. By combining these two methods, we can observe further improvements on these two databases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.15421v2-abstract-full').style.display = 'none'; document.getElementById('2103.15421v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.12722">arXiv:2011.12722</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2011.12722">pdf</a>, <a href="https://arxiv.org/format/2011.12722">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Attention Aware Cost Volume Pyramid Based Multi-view Stereo Network for 3D Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Yu%2C+A">Anzhu Yu</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenyue Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+B">Bing Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+X">Xin Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+X">Xin Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Cao%2C+X">Xuefeng Cao</a>, <a href="/search/eess?searchtype=author&amp;query=Jiang%2C+B">Bingchuan Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.12722v1-abstract-short" style="display: inline;"> We present an efficient multi-view stereo (MVS) network for 3D reconstruction from multiview images. While previous learning based reconstruction approaches performed quite well, most of them estimate depth maps at a fixed resolution using plane sweep volumes with a fixed depth hypothesis at each plane, which requires densely sampled planes for desired accuracy and therefore is difficult to achiev&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.12722v1-abstract-full').style.display = 'inline'; document.getElementById('2011.12722v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.12722v1-abstract-full" style="display: none;"> We present an efficient multi-view stereo (MVS) network for 3D reconstruction from multiview images. While previous learning based reconstruction approaches performed quite well, most of them estimate depth maps at a fixed resolution using plane sweep volumes with a fixed depth hypothesis at each plane, which requires densely sampled planes for desired accuracy and therefore is difficult to achieve high resolution depth maps. In this paper we introduce a coarseto-fine depth inference strategy to achieve high resolution depth. This strategy estimates the depth map at coarsest level, while the depth maps at finer levels are considered as the upsampled depth map from previous level with pixel-wise depth residual. Thus, we narrow the depth searching range with priori information from previous level and construct new cost volumes from the pixel-wise depth residual to perform depth map refinement. Then the final depth map could be achieved iteratively since all the parameters are shared between different levels. At each level, the self-attention layer is introduced to the feature extraction block for capturing the long range dependencies for depth inference task, and the cost volume is generated using similarity measurement instead of the variance based methods used in previous work. Experiments were conducted on both the DTU benchmark dataset and recently released BlendedMVS dataset. The results demonstrated that our model could outperform most state-of-the-arts (SOTA) methods. The codebase of this project is at https://github.com/ArthasMil/AACVP-MVSNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.12722v1-abstract-full').style.display = 'none'; document.getElementById('2011.12722v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.10919">arXiv:2010.10919</a> <span>&nbsp;&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Multi-task Metric Learning for Text-independent Speaker Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Chen%2C+Y">Yafeng Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Shi%2C+J">Jingjing Shi</a>, <a href="/search/eess?searchtype=author&amp;query=Qi%2C+J">Jiajun Qi</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+T">Tan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.10919v2-abstract-short" style="display: inline;"> In this work, we introduce metric learning (ML) to enhance the deep embedding learning for text-independent speaker verification (SV). Specifically, the deep speaker embedding network is trained with conventional cross entropy loss and auxiliary pair-based ML loss function. For the auxiliary ML task, training samples of a mini-batch are first arranged into pairs, then positive and negative pairs a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.10919v2-abstract-full').style.display = 'inline'; document.getElementById('2010.10919v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.10919v2-abstract-full" style="display: none;"> In this work, we introduce metric learning (ML) to enhance the deep embedding learning for text-independent speaker verification (SV). Specifically, the deep speaker embedding network is trained with conventional cross entropy loss and auxiliary pair-based ML loss function. For the auxiliary ML task, training samples of a mini-batch are first arranged into pairs, then positive and negative pairs are selected and weighted through their own and relative similarities, and finally the auxiliary ML loss is calculated by the similarity of the selected pairs. To evaluate the proposed method, we conduct experiments on the Speaker in the Wild (SITW) dataset. The results demonstrate the effectiveness of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.10919v2-abstract-full').style.display = 'none'; document.getElementById('2010.10919v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Not a particularly high-quality work, so we request withdrawal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.10163">arXiv:2010.10163</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.10163">pdf</a>, <a href="https://arxiv.org/format/2010.10163">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Claw U-Net: A Unet-based Network with Deep Feature Concatenation for Scleral Blood Vessel Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Yao%2C+C">Chang Yao</a>, <a href="/search/eess?searchtype=author&amp;query=Tang%2C+J">Jingyu Tang</a>, <a href="/search/eess?searchtype=author&amp;query=Hu%2C+M">Menghan Hu</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+Y">Yue Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenyi Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+Q">Qingli Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+X">Xiao-Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.10163v1-abstract-short" style="display: inline;"> Sturge-Weber syndrome (SWS) is a vascular malformation disease, and it may cause blindness if the patient&#39;s condition is severe. Clinical results show that SWS can be divided into two types based on the characteristics of scleral blood vessels. Therefore, how to accurately segment scleral blood vessels has become a significant problem in computer-aided diagnosis. In this research, we propose to co&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.10163v1-abstract-full').style.display = 'inline'; document.getElementById('2010.10163v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.10163v1-abstract-full" style="display: none;"> Sturge-Weber syndrome (SWS) is a vascular malformation disease, and it may cause blindness if the patient&#39;s condition is severe. Clinical results show that SWS can be divided into two types based on the characteristics of scleral blood vessels. Therefore, how to accurately segment scleral blood vessels has become a significant problem in computer-aided diagnosis. In this research, we propose to continuously upsample the bottom layer&#39;s feature maps to preserve image details, and design a novel Claw UNet based on UNet for scleral blood vessel segmentation. Specifically, the residual structure is used to increase the number of network layers in the feature extraction stage to learn deeper features. In the decoding stage, by fusing the features of the encoding, upsampling, and decoding parts, Claw UNet can achieve effective segmentation in the fine-grained regions of scleral blood vessels. To effectively extract small blood vessels, we use the attention mechanism to calculate the attention coefficient of each position in images. Claw UNet outperforms other UNet-based networks on scleral blood vessel image dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.10163v1-abstract-full').style.display = 'none'; document.getElementById('2010.10163v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages,4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.06248">arXiv:2010.06248</a> <span>&nbsp;&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Exploring Universal Speech Attributes for Speaker Verification with an Improved Cross-stitch Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Qi%2C+J">Jiajun Qi</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Shi%2C+J">Jingjing Shi</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+Y">Yafeng Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+T">Tan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.06248v3-abstract-short" style="display: inline;"> The universal speech attributes for x-vector based speaker verification (SV) are addressed in this paper. The manner and place of articulation form the fundamental speech attribute unit (SAU), and then new speech attribute (NSA) units for acoustic modeling are generated by tied tri-SAU states. An improved cross-stitch network is adopted as a multitask learning (MTL) framework for integrating these&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06248v3-abstract-full').style.display = 'inline'; document.getElementById('2010.06248v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.06248v3-abstract-full" style="display: none;"> The universal speech attributes for x-vector based speaker verification (SV) are addressed in this paper. The manner and place of articulation form the fundamental speech attribute unit (SAU), and then new speech attribute (NSA) units for acoustic modeling are generated by tied tri-SAU states. An improved cross-stitch network is adopted as a multitask learning (MTL) framework for integrating these universal speech attributes into the x-vector network training process. Experiments are conducted on common condition 5 (CC5) of the core-core and the 10 s-10 s tests of the NIST SRE10 evaluation set, and the proposed algorithm can achieve consistent improvements over the baseline x-vector on both these tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06248v3-abstract-full').style.display = 'none'; document.getElementById('2010.06248v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Not a particularly high-quality work, so we request withdrawal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.13290">arXiv:2007.13290</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2007.13290">pdf</a>, <a href="https://arxiv.org/ps/2007.13290">ps</a>, <a href="https://arxiv.org/format/2007.13290">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.sigpro.2020.107729">10.1016/j.sigpro.2020.107729 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep Learning Methods for Solving Linear Inverse Problems: Research Directions and Paradigms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Bai%2C+Y">Yanna Bai</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+W">Wei Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+J">Jie Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.13290v2-abstract-short" style="display: inline;"> The linear inverse problem is fundamental to the development of various scientific areas. Innumerable attempts have been carried out to solve different variants of the linear inverse problem in different applications. Nowadays, the rapid development of deep learning provides a fresh perspective for solving the linear inverse problem, which has various well-designed network architectures results in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.13290v2-abstract-full').style.display = 'inline'; document.getElementById('2007.13290v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.13290v2-abstract-full" style="display: none;"> The linear inverse problem is fundamental to the development of various scientific areas. Innumerable attempts have been carried out to solve different variants of the linear inverse problem in different applications. Nowadays, the rapid development of deep learning provides a fresh perspective for solving the linear inverse problem, which has various well-designed network architectures results in state-of-the-art performance in many applications. In this paper, we present a comprehensive survey of the recent progress in the development of deep learning for solving various linear inverse problems. We review how deep learning methods are used in solving different linear inverse problems, and explore the structured neural network architectures that incorporate knowledge used in traditional methods. Furthermore, we identify open challenges and potential future directions along this research line. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.13290v2-abstract-full').style.display = 'none'; document.getElementById('2007.13290v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">60 pages; Publication in (Elsevier) Signal Processing, 2020</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Signal Processing, 2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.01628">arXiv:2007.01628</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2007.01628">pdf</a>, <a href="https://arxiv.org/format/2007.01628">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TIP.2021.3064433">10.1109/TIP.2021.3064433 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> HDR-GAN: HDR Image Reconstruction from Multi-Exposed LDR Images with Large Motions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Niu%2C+Y">Yuzhen Niu</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+J">Jianbin Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+W">Wenxi Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wenzhong Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Lau%2C+R+W+H">Rynson W. H. Lau</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.01628v1-abstract-short" style="display: inline;"> Synthesizing high dynamic range (HDR) images from multiple low-dynamic range (LDR) exposures in dynamic scenes is challenging. There are two major problems caused by the large motions of foreground objects. One is the severe misalignment among the LDR images. The other is the missing content due to the over-/under-saturated regions caused by the moving objects, which may not be easily compensated&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.01628v1-abstract-full').style.display = 'inline'; document.getElementById('2007.01628v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.01628v1-abstract-full" style="display: none;"> Synthesizing high dynamic range (HDR) images from multiple low-dynamic range (LDR) exposures in dynamic scenes is challenging. There are two major problems caused by the large motions of foreground objects. One is the severe misalignment among the LDR images. The other is the missing content due to the over-/under-saturated regions caused by the moving objects, which may not be easily compensated for by the multiple LDR exposures. Thus, it requires the HDR generation model to be able to properly fuse the LDR images and restore the missing details without introducing artifacts. To address these two problems, we propose in this paper a novel GAN-based model, HDR-GAN, for synthesizing HDR images from multi-exposed LDR images. To our best knowledge, this work is the first GAN-based approach for fusing multi-exposed LDR images for HDR reconstruction. By incorporating adversarial learning, our method is able to produce faithful information in the regions with missing content. In addition, we also propose a novel generator network, with a reference-based residual merging block for aligning large object motions in the feature domain, and a deep HDR supervision scheme for eliminating artifacts of the reconstructed HDR images. Experimental results demonstrate that our model achieves state-of-the-art reconstruction performance over the prior HDR methods on diverse scenes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.01628v1-abstract-full').style.display = 'none'; document.getElementById('2007.01628v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.03568">arXiv:2006.03568</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2006.03568">pdf</a>, <a href="https://arxiv.org/format/2006.03568">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Graph Layer Security: Encrypting Information via Common Networked Physics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wei%2C+Z">Zhuangkun Wei</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+L">Liang Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+S+C">Schyler Chengyao Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+B">Bin Li</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.03568v3-abstract-short" style="display: inline;"> The proliferation of low-cost Internet of Things (IoT) devices has led to a race between wireless security and channel attacks. Traditional cryptography requires high-computational power and is not suitable for low-power IoT scenarios. Whist, recently developed physical layer security (PLS) can exploit common wireless channel state information (CSI), its sensitivity to channel estimation makes the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.03568v3-abstract-full').style.display = 'inline'; document.getElementById('2006.03568v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.03568v3-abstract-full" style="display: none;"> The proliferation of low-cost Internet of Things (IoT) devices has led to a race between wireless security and channel attacks. Traditional cryptography requires high-computational power and is not suitable for low-power IoT scenarios. Whist, recently developed physical layer security (PLS) can exploit common wireless channel state information (CSI), its sensitivity to channel estimation makes them vulnerable from attacks. In this work, we exploit an alternative common physics shared between IoT transceivers: the monitored channel-irrelevant physical networked dynamics (e.g., water/oil/gas/electrical signal-flows). Leveraging this, we propose for the first time, graph layer security (GLS), by exploiting the dependency in physical dynamics among network nodes for information encryption and decryption. A graph Fourier transform (GFT) operator is used to characterize such dependency into a graph-bandlimted subspace, which allows the generations of channel-irrelevant cipher keys by maximizing the secrecy rate. We evaluate our GLS against designed active and passive attackers, using IEEE 39-Bus system. Results demonstrate that, GLS is not reliant on wireless CSI, and can combat attackers that have partial networked dynamic knowledge (realistic access to full dynamic and critical nodes remains challenging). We believe this novel GLS has widespread applicability in secure health monitoring and for Digital Twins in adversarial radio environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.03568v3-abstract-full').style.display = 'none'; document.getElementById('2006.03568v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.13198">arXiv:2004.13198</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.13198">pdf</a>, <a href="https://arxiv.org/ps/2004.13198">ps</a>, <a href="https://arxiv.org/format/2004.13198">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/JSYST.2020.3036129">10.1109/JSYST.2020.3036129 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Uncertainty of Resilience in Complex Networks with Nonlinear Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Moutsinas%2C+G">Giannis Moutsinas</a>, <a href="/search/eess?searchtype=author&amp;query=Zou%2C+M">Mengbang Zou</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.13198v1-abstract-short" style="display: inline;"> Resilience is a system&#39;s ability to maintain its function when perturbations and errors occur. Whilst we understand low-dimensional networked systems&#39; behavior well, our understanding of systems consisting of a large number of components is limited. Recent research in predicting the network level resilience pattern has advanced our understanding of the coupling relationship between global network&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.13198v1-abstract-full').style.display = 'inline'; document.getElementById('2004.13198v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.13198v1-abstract-full" style="display: none;"> Resilience is a system&#39;s ability to maintain its function when perturbations and errors occur. Whilst we understand low-dimensional networked systems&#39; behavior well, our understanding of systems consisting of a large number of components is limited. Recent research in predicting the network level resilience pattern has advanced our understanding of the coupling relationship between global network topology and local nonlinear component dynamics. However, when there is uncertainty in the model parameters, our understanding of how this translates to uncertainty in resilience is unclear for a large-scale networked system. Here we develop a polynomial chaos expansion method to estimate the resilience for a wide range of uncertainty distributions. By applying this method to case studies, we not only reveal the general resilience distribution with respect to the topology and dynamics sub-models, but also identify critical aspects to inform better monitoring to reduce uncertainty. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.13198v1-abstract-full').style.display = 'none'; document.getElementById('2004.13198v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8pages, 7figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.09615">arXiv:2004.09615</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.09615">pdf</a>, <a href="https://arxiv.org/format/2004.09615">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSP.2020.3032408">10.1109/TSP.2020.3032408 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Sampling and Inference of Networked Dynamics using Log-Koopman Nonlinear Graph Fourier Transform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wei%2C+Z">Zhuangkun Wei</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+B">Bin Li</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+C">Chengyao Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.09615v2-abstract-short" style="display: inline;"> Networked nonlinear dynamics underpin the complex functionality of many engineering, social, biological, and ecological systems. Monitoring the networked dynamics via the minimum subset of nodes is essential for a variety of scientific and operational purposes. When there is a lack of a explicit model and networked signal space, traditional evolution analysis and non-convex methods are insufficien&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.09615v2-abstract-full').style.display = 'inline'; document.getElementById('2004.09615v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.09615v2-abstract-full" style="display: none;"> Networked nonlinear dynamics underpin the complex functionality of many engineering, social, biological, and ecological systems. Monitoring the networked dynamics via the minimum subset of nodes is essential for a variety of scientific and operational purposes. When there is a lack of a explicit model and networked signal space, traditional evolution analysis and non-convex methods are insufficient. An important data-driven state-of-the-art method use the Koopman operator to generate a linear evolution model for a vector-valued observable of original state-space. As a result, one can derive a sampling strategy via the linear evolution property of observable. However, current polynomial Koopman operators result in a large sampling space due to: (i) the large size of polynomial based observables ($O(N^2)$, $N$ number of nodes in network), and (ii) not factoring in the nonlinear dependency between observables. In this work, to achieve linear scaling ($O(N)$) and a small set of sampling nodes, we propose to combine a novel Log-Koopman operator and nonlinear Graph Fourier Transform (NL-GFT) scheme. First, the Log-Koopman operator is able to reduce the size of observables by transforming multiplicative poly-observable to logarithm summation. Second, a nonlinear GFT concept and sampling theory are provided to exploit the nonlinear dependence of observables for Koopman linearized evolution analysis. Combined, the sampling and reconstruction algorithms are designed and demonstrated on two established application areas. The results demonstrate that the proposed Log-Koopman NL-GFT scheme can (i) linearize unknown nonlinear dynamics using $O(N)$ observables, and (ii) achieve lower number of sampling nodes, compared with the state-of-the art polynomial Koopman linear evolution analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.09615v2-abstract-full').style.display = 'none'; document.getElementById('2004.09615v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.06049">arXiv:2002.06049</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2002.06049">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> An Adaptive X-vector Model for Text-independent Speaker Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Wu Guo</a>, <a href="/search/eess?searchtype=author&amp;query=Dai%2C+L">Lirong Dai</a>, <a href="/search/eess?searchtype=author&amp;query=Du%2C+J">Jun Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.06049v1-abstract-short" style="display: inline;"> In this paper, adaptive mechanisms are applied in deep neural network (DNN) training for x-vector-based text-independent speaker verification. First, adaptive convolutional neural networks (ACNNs) are employed in frame-level embedding layers, where the parameters of the convolution filters are adjusted based on the input features. Compared with conventional CNNs, ACNNs have more flexibility in cap&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.06049v1-abstract-full').style.display = 'inline'; document.getElementById('2002.06049v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.06049v1-abstract-full" style="display: none;"> In this paper, adaptive mechanisms are applied in deep neural network (DNN) training for x-vector-based text-independent speaker verification. First, adaptive convolutional neural networks (ACNNs) are employed in frame-level embedding layers, where the parameters of the convolution filters are adjusted based on the input features. Compared with conventional CNNs, ACNNs have more flexibility in capturing speaker information. Moreover, we replace conventional batch normalization (BN) with adaptive batch normalization (ABN). By dynamically generating the scaling and shifting parameters in BN, ABN adapts models to the acoustic variability arising from various factors such as channel and environmental noises. Finally, we incorporate these two methods to further improve performance. Experiments are carried out on the speaker in the wild (SITW) and VOiCES databases. The results demonstrate that the proposed methods significantly outperform the original x-vector approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.06049v1-abstract-full').style.display = 'none'; document.getElementById('2002.06049v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.05508">arXiv:2002.05508</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2002.05508">pdf</a>, <a href="https://arxiv.org/format/2002.05508">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Neural Network Approximation of Graph Fourier Transforms for Sparse Sampling of Networked Flow Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Pagani%2C+A">Alessio Pagani</a>, <a href="/search/eess?searchtype=author&amp;query=Wei%2C+Z">Zhuangkun Wei</a>, <a href="/search/eess?searchtype=author&amp;query=Silva%2C+R">Ricardo Silva</a>, <a href="/search/eess?searchtype=author&amp;query=Guo%2C+W">Weisi Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.05508v1-abstract-short" style="display: inline;"> Infrastructure monitoring is critical for safe operations and sustainability. Water distribution networks (WDNs) are large-scale networked critical systems with complex cascade dynamics which are difficult to predict. Ubiquitous monitoring is expensive and a key challenge is to infer the contaminant dynamics from partial sparse monitoring data. Existing approaches use multi-objective optimisation&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.05508v1-abstract-full').style.display = 'inline'; document.getElementById('2002.05508v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.05508v1-abstract-full" style="display: none;"> Infrastructure monitoring is critical for safe operations and sustainability. Water distribution networks (WDNs) are large-scale networked critical systems with complex cascade dynamics which are difficult to predict. Ubiquitous monitoring is expensive and a key challenge is to infer the contaminant dynamics from partial sparse monitoring data. Existing approaches use multi-objective optimisation to find the minimum set of essential monitoring points, but lack performance guarantees and a theoretical framework. Here, we first develop Graph Fourier Transform (GFT) operators to compress networked contamination spreading dynamics to identify the essential principle data collection points with inference performance guarantees. We then build autoencoder (AE) inspired neural networks (NN) to generalize the GFT sampling process and under-sample further from the initial sampling set, allowing a very small set of data points to largely reconstruct the contamination dynamics over real and artificial WDNs. Various sources of the contamination are tested and we obtain high accuracy reconstruction using around 5-10% of the sample set. This general approach of compression and under-sampled recovery via neural networks can be applied to a wide range of networked infrastructures to enable digital twins. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.05508v1-abstract-full').style.display = 'none'; document.getElementById('2002.05508v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Guo%2C+W&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+W&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Guo%2C+W&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10