CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 50 results for author: <span class="mathjax">Ding, W</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&amp;query=Ding%2C+W">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ding, W"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ding%2C+W&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ding, W"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Ding%2C+W&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Ding%2C+W&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ding%2C+W&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12811">arXiv:2410.12811</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.12811">pdf</a>, <a href="https://arxiv.org/format/2410.12811">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Decoding Emotions: Unveiling Facial Expressions through Acoustic Sensing with Contrastive Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wang%2C+G">Guangjing Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+J">Juexing Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhou%2C+C">Ce Zhou</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weikang Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Zeng%2C+H">Huacheng Zeng</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+T">Tianxing Li</a>, <a href="/search/eess?searchtype=author&amp;query=Yan%2C+Q">Qiben Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12811v1-abstract-short" style="display: inline;"> Expression recognition holds great promise for applications such as content recommendation and mental healthcare by accurately detecting users&#39; emotional states. Traditional methods often rely on cameras or wearable sensors, which raise privacy concerns and add extra device burdens. In addition, existing acoustic-based methods struggle to maintain satisfactory performance when there is a distribut&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12811v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12811v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12811v1-abstract-full" style="display: none;"> Expression recognition holds great promise for applications such as content recommendation and mental healthcare by accurately detecting users&#39; emotional states. Traditional methods often rely on cameras or wearable sensors, which raise privacy concerns and add extra device burdens. In addition, existing acoustic-based methods struggle to maintain satisfactory performance when there is a distribution shift between the training dataset and the inference dataset. In this paper, we introduce FacER+, an active acoustic facial expression recognition system, which eliminates the requirement for external microphone arrays. FacER+ extracts facial expression features by analyzing the echoes of near-ultrasound signals emitted between the 3D facial contour and the earpiece speaker on a smartphone. This approach not only reduces background noise but also enables the identification of different expressions from various users with minimal training data. We develop a contrastive external attention-based model to consistently learn expression features across different users, reducing the distribution differences. Extensive experiments involving 20 volunteers, both with and without masks, demonstrate that FacER+ can accurately recognize six common facial expressions with over 90% accuracy in diverse, user-independent real-life scenarios, surpassing the performance of the leading acoustic sensing methods by 10%. FacER+ offers a robust and practical solution for facial expression recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12811v1-abstract-full').style.display = 'none'; document.getElementById('2410.12811v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The extended version of the 2023 IEEE INFOCOM conference paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02070">arXiv:2409.02070</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.02070">pdf</a>, <a href="https://arxiv.org/format/2409.02070">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Explicit Differentiable Slicing and Global Deformation for Cardiac Mesh Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Luo%2C+Y">Yihao Luo</a>, <a href="/search/eess?searchtype=author&amp;query=Sesia%2C+D">Dario Sesia</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+Y">Yinzhe Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenhao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Shi%2C+F">Fadong Shi</a>, <a href="/search/eess?searchtype=author&amp;query=Shah%2C+A">Anoop Shah</a>, <a href="/search/eess?searchtype=author&amp;query=Kaural%2C+A">Amit Kaural</a>, <a href="/search/eess?searchtype=author&amp;query=Mayet%2C+J">Jamil Mayet</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+G">Guang Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Yap%2C+C">ChoonHwai Yap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02070v2-abstract-short" style="display: inline;"> Mesh reconstruction of the cardiac anatomy from medical images is useful for shape and motion measurements and biophysics simulations to facilitate the assessment of cardiac function and health. However, 3D medical images are often acquired as 2D slices that are sparsely sampled and noisy, and mesh reconstruction on such data is a challenging task. Traditional voxel-based approaches rely on pre- a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02070v2-abstract-full').style.display = 'inline'; document.getElementById('2409.02070v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02070v2-abstract-full" style="display: none;"> Mesh reconstruction of the cardiac anatomy from medical images is useful for shape and motion measurements and biophysics simulations to facilitate the assessment of cardiac function and health. However, 3D medical images are often acquired as 2D slices that are sparsely sampled and noisy, and mesh reconstruction on such data is a challenging task. Traditional voxel-based approaches rely on pre- and post-processing that compromises image fidelity, while mesh-level deep learning approaches require mesh annotations that are difficult to get. Therefore, direct cross-domain supervision from 2D images to meshes is a key technique for advancing 3D learning in medical imaging, but it has not been well-developed. While there have been attempts to approximate the optimized meshes&#39; slicing, few existing methods directly use 2D slices to supervise mesh reconstruction in a differentiable manner. Here, we propose a novel explicit differentiable voxelization and slicing (DVS) algorithm that allows gradient backpropagation to a mesh from its slices, facilitating refined mesh optimization directly supervised by the losses defined on 2D images. Further, we propose an innovative framework for extracting patient-specific left ventricle (LV) meshes from medical images by coupling DVS with a graph harmonic deformation (GHD) mesh morphing descriptor of cardiac shape that naturally preserves mesh quality and smoothness during optimization. Experimental results demonstrate that our method achieves state-of-the-art performance in cardiac mesh reconstruction tasks from CT and MRI, with an overall Dice score of 90% on multi-datasets, outperforming existing approaches. The proposed method can further quantify clinically useful parameters such as ejection fraction and global myocardial strains, closely matching the ground truth and surpassing the traditional voxel-based approach in sparse images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02070v2-abstract-full').style.display = 'none'; document.getElementById('2409.02070v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07592">arXiv:2408.07592</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.07592">pdf</a>, <a href="https://arxiv.org/format/2408.07592">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Multi-periodicity dependency Transformer based on spectrum offset for radio frequency fingerprint identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xiao%2C+J">Jing Xiao</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenrui Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Shao%2C+Z">Zeqi Shao</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+D">Duona Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Ma%2C+Y">Yanan Ma</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Y">Yufeng Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+J">Jian Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07592v1-abstract-short" style="display: inline;"> Radio Frequency Fingerprint Identification (RFFI) has emerged as a pivotal task for reliable device authentication. Despite advancements in RFFI methods, background noise and intentional modulation features result in weak energy and subtle differences in the RFF features. These challenges diminish the capability of RFFI methods in feature representation, complicating the effective identification o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07592v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07592v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07592v1-abstract-full" style="display: none;"> Radio Frequency Fingerprint Identification (RFFI) has emerged as a pivotal task for reliable device authentication. Despite advancements in RFFI methods, background noise and intentional modulation features result in weak energy and subtle differences in the RFF features. These challenges diminish the capability of RFFI methods in feature representation, complicating the effective identification of device identities. This paper proposes a novel Multi-Periodicity Dependency Transformer (MPDFormer) to address these challenges. The MPDFormer employs a spectrum offset-based periodic embedding representation to augment the discrepency of intrinsic features. We delve into the intricacies of the periodicity-dependency attention mechanism, integrating both inter-period and intra-period attention mechanisms. This mechanism facilitates the extraction of both long and short-range periodicity-dependency features , accentuating the feature distinction whilst concurrently attenuating the perturbations caused by background noise and weak-periodicity features. Empirical results demonstrate MPDFormer&#39;s superiority over established baseline methods, achieving a 0.07s inference time on NVIDIA Jetson Orin NX. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07592v1-abstract-full').style.display = 'none'; document.getElementById('2408.07592v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00248">arXiv:2408.00248</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.00248">pdf</a>, <a href="https://arxiv.org/format/2408.00248">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/JIOT.2024.3420774">10.1109/JIOT.2024.3420774 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Joint Vehicle Connection and Beamforming Optimization in Digital Twin Assisted Integrated Sensing and Communication Vehicular Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weihang Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+Z">Zhaohui Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+M">Mingzhe Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Y">Yuchen Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Shikh-Bahaei%2C+M">Mohammad Shikh-Bahaei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00248v1-abstract-short" style="display: inline;"> This paper introduces an approach to harness digital twin (DT) technology in the realm of integrated sensing and communications (ISAC) in the sixth-generation (6G) Internet-of-everything (IoE) applications. We consider moving targets in a vehicular network and use DT to track and predict the motion of the vehicles. After predicting the location of the vehicle at the next time slot, the DT designs&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00248v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00248v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00248v1-abstract-full" style="display: none;"> This paper introduces an approach to harness digital twin (DT) technology in the realm of integrated sensing and communications (ISAC) in the sixth-generation (6G) Internet-of-everything (IoE) applications. We consider moving targets in a vehicular network and use DT to track and predict the motion of the vehicles. After predicting the location of the vehicle at the next time slot, the DT designs the assignment and beamforming for each vehicle. The real time sensing information is then utilized to update and refine the DT, enabling further processing and decision-making. This model incorporates a dynamic Kalman gain, which is updated at each time slot based on the received echo signals. The state representation encompasses both vehicle motion information and the error matrix, with the posterior Cram茅r-Rao bound (PCRB) employed to assess sensing accuracy. We consider a network with two roadside units (RSUs), and the vehicles need to be allocated to one of them. To optimize the overall transmission rate while maintaining an acceptable sensing accuracy, an optimization problem is formulated. Since it is generally hard to solve the original problem, Lagrange multipliers and fractional programming are employed to simplify this optimization problem. To solve the simplified problem, this paper introduces both greedy and heuristic algorithms through optimizing both vehicle assignments and predictive beamforming. The optimized results are then transferred back to the real space for ISAC applications. Recognizing the computational complexity of the greedy and heuristic algorithms, a bidirectional long short-term memory (LSTM)-based recurrent neural network (RNN) is proposed for efficient beamforming design within the DT. Simulation results demonstrate the effectiveness of the DT-based ISAC network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00248v1-abstract-full').style.display = 'none'; document.getElementById('2408.00248v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Internet of Things Journal (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04368">arXiv:2407.04368</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.04368">pdf</a>, <a href="https://arxiv.org/format/2407.04368">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Romanization Encoding For Multilingual ASR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wen Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Jia%2C+F">Fei Jia</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+H">Hainan Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Xi%2C+Y">Yu Xi</a>, <a href="/search/eess?searchtype=author&amp;query=Lai%2C+J">Junjie Lai</a>, <a href="/search/eess?searchtype=author&amp;query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04368v1-abstract-short" style="display: inline;"> We introduce romanization encoding for script-heavy languages to optimize multilingual and code-switching Automatic Speech Recognition (ASR) systems. By adopting romanization encoding alongside a balanced concatenated tokenizer within a FastConformer-RNNT framework equipped with a Roman2Char module, we significantly reduce vocabulary and output dimensions, enabling larger training batches and redu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04368v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04368v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04368v1-abstract-full" style="display: none;"> We introduce romanization encoding for script-heavy languages to optimize multilingual and code-switching Automatic Speech Recognition (ASR) systems. By adopting romanization encoding alongside a balanced concatenated tokenizer within a FastConformer-RNNT framework equipped with a Roman2Char module, we significantly reduce vocabulary and output dimensions, enabling larger training batches and reduced memory consumption. Our method decouples acoustic modeling and language modeling, enhancing the flexibility and adaptability of the system. In our study, applying this method to Mandarin-English ASR resulted in a remarkable 63.51% vocabulary reduction and notable performance gains of 13.72% and 15.03% on SEAME code-switching benchmarks. Ablation studies on Mandarin-Korean and Mandarin-Japanese highlight our method&#39;s strong capability to address the complexities of other script-heavy languages, paving the way for more versatile and effective multilingual ASR systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04368v1-abstract-full').style.display = 'none'; document.getElementById('2407.04368v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04219">arXiv:2407.04219</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.04219">pdf</a>, <a href="https://arxiv.org/format/2407.04219">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Semi-supervised Learning for Code-Switching ASR with Large Language Model Filter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Xi%2C+Y">Yu Xi</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wen Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Yu%2C+K">Kai Yu</a>, <a href="/search/eess?searchtype=author&amp;query=Lai%2C+J">Junjie Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04219v2-abstract-short" style="display: inline;"> Code-switching (CS) phenomenon occurs when words or phrases from different languages are alternated in a single sentence. Due to data scarcity, building an effective CS Automatic Speech Recognition (ASR) system remains challenging. In this paper, we propose to enhance CS-ASR systems by utilizing rich unsupervised monolingual speech data within a semi-supervised learning framework, particularly whe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04219v2-abstract-full').style.display = 'inline'; document.getElementById('2407.04219v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04219v2-abstract-full" style="display: none;"> Code-switching (CS) phenomenon occurs when words or phrases from different languages are alternated in a single sentence. Due to data scarcity, building an effective CS Automatic Speech Recognition (ASR) system remains challenging. In this paper, we propose to enhance CS-ASR systems by utilizing rich unsupervised monolingual speech data within a semi-supervised learning framework, particularly when access to CS data is limited. To achieve this, we establish a general paradigm for applying noisy student training (NST) to the CS-ASR task. Specifically, we introduce the LLM-Filter, which leverages well-designed prompt templates to activate the correction capability of large language models (LLMs) for monolingual data selection and pseudo-labels refinement during NST. Our experiments on the supervised ASRU-CS and unsupervised AISHELL-2 and LibriSpeech datasets show that our method not only achieves significant improvements over supervised and semi-supervised learning baselines for the CS task, but also attains better performance compared with the fully-supervised oracle upper-bound on the CS English part. Additionally, we further investigate the influence of accent on AESRC dataset and demonstrate that our method can get achieve additional benefits when the monolingual data contains relevant linguistic characteristic. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04219v2-abstract-full').style.display = 'none'; document.getElementById('2407.04219v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by SLT2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05725">arXiv:2402.05725</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.05725">pdf</a>, <a href="https://arxiv.org/format/2402.05725">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Dual-modal Tactile E-skin: Enabling Bidirectional Human-Robot Interaction via Integrated Tactile Perception and Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Mu%2C+S">Shilong Mu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+R">Runze Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Lin%2C+Z">Zenan Lin</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+Y">Yan Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+S">Shoujie Li</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+C">Chenchang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+X">Xiao-Ping Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenbo Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05725v1-abstract-short" style="display: inline;"> To foster an immersive and natural human-robot interaction, the implementation of tactile perception and feedback becomes imperative, effectively bridging the conventional sensory gap. In this paper, we propose a dual-modal electronic skin (e-skin) that integrates magnetic tactile sensing and vibration feedback for enhanced human-robot interaction. The dual-modal tactile e-skin offers multi-functi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05725v1-abstract-full').style.display = 'inline'; document.getElementById('2402.05725v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05725v1-abstract-full" style="display: none;"> To foster an immersive and natural human-robot interaction, the implementation of tactile perception and feedback becomes imperative, effectively bridging the conventional sensory gap. In this paper, we propose a dual-modal electronic skin (e-skin) that integrates magnetic tactile sensing and vibration feedback for enhanced human-robot interaction. The dual-modal tactile e-skin offers multi-functional tactile sensing and programmable haptic feedback, underpinned by a layered structure comprised of flexible magnetic films, soft silicone, a Hall sensor and actuator array, and a microcontroller unit. The e-skin captures the magnetic field changes caused by subtle deformations through Hall sensors, employing deep learning for accurate tactile perception. Simultaneously, the actuator array generates mechanical vibrations to facilitate haptic feedback, delivering diverse mechanical stimuli. Notably, the dual-modal e-skin is capable of transmitting tactile information bidirectionally, enabling object recognition and fine-weighing operations. This bidirectional tactile interaction framework will enhance the immersion and efficiency of interactions between humans and robots. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05725v1-abstract-full').style.display = 'none'; document.getElementById('2402.05725v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 8 figures. Submitted to 2024 IEEE International Conference on Robotics and Automation (ICRA), Japan, Yokohama</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.05898">arXiv:2401.05898</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.05898">pdf</a>, <a href="https://arxiv.org/format/2401.05898">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Partial Compress-and-Forward Strategy for Relay-assisted Wireless Networks Based on Rateless Coding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weihang Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Shikh-Bahaei%2C+M">Mohammad Shikh-Bahaei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.05898v1-abstract-short" style="display: inline;"> In this work, we propose a novel partial compress-and-forward (PCF) scheme for improving the maximum achievable transmission rate of a diamond relay network with two noisy relays. PCF combines conventional compress-and-forward (CF) and amplify-and-forward (AF) protocols, enabling one relay to operate alternately in the CF or the AF mode, while the other relay works purely in the CF mode. As the di&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05898v1-abstract-full').style.display = 'inline'; document.getElementById('2401.05898v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.05898v1-abstract-full" style="display: none;"> In this work, we propose a novel partial compress-and-forward (PCF) scheme for improving the maximum achievable transmission rate of a diamond relay network with two noisy relays. PCF combines conventional compress-and-forward (CF) and amplify-and-forward (AF) protocols, enabling one relay to operate alternately in the CF or the AF mode, while the other relay works purely in the CF mode. As the direct link from the source to the destination is unavailable, and there is no noiseless relay in the diamond network, messages received from both relays must act as side information for each other and must be decoded jointly. We propose a joint decoder to decode two Luby transform (LT) codes received from both relays corresponding to the same original message. Numerical results show that PCF can achieve significant performance improvements compared to decode-and-forward (DF) and pure CF protocols when at least the channels connected to one of the relays are of high quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05898v1-abstract-full').style.display = 'none'; document.getElementById('2401.05898v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.01573">arXiv:2312.01573</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.01573">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Survey on deep learning in multimodal medical imaging for cancer detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Tian%2C+Y">Yan Tian</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+Z">Zhaocheng Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Ma%2C+Y">Yujun Ma</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weiping Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+R">Ruili Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+Z">Zhihong Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+G">Guohua Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+L">Linyang He</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+X">Xuran Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.01573v1-abstract-short" style="display: inline;"> The task of multimodal cancer detection is to determine the locations and categories of lesions by using different imaging techniques, which is one of the key research methods for cancer diagnosis. Recently, deep learning-based object detection has made significant developments due to its strength in semantic feature extraction and nonlinear function fitting. However, multimodal cancer detection r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01573v1-abstract-full').style.display = 'inline'; document.getElementById('2312.01573v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.01573v1-abstract-full" style="display: none;"> The task of multimodal cancer detection is to determine the locations and categories of lesions by using different imaging techniques, which is one of the key research methods for cancer diagnosis. Recently, deep learning-based object detection has made significant developments due to its strength in semantic feature extraction and nonlinear function fitting. However, multimodal cancer detection remains challenging due to morphological differences in lesions, interpatient variability, difficulty in annotation, and imaging artifacts. In this survey, we mainly investigate over 150 papers in recent years with respect to multimodal cancer detection using deep learning, with a focus on datasets and solutions to various challenges such as data annotation, variance between classes, small-scale lesions, and occlusion. We also provide an overview of the advantages and drawbacks of each approach. Finally, we discuss the current scope of work and provide directions for the future development of multimodal cancer detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01573v1-abstract-full').style.display = 'none'; document.getElementById('2312.01573v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Neural Computing and Applications. 2023 Nov 29:1-6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.11951">arXiv:2307.11951</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.11951">pdf</a>, <a href="https://arxiv.org/format/2307.11951">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Simple and Efficient RSS-AOA Based Localization with Heterogeneous Anchor Nodes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weizhong Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Chang%2C+S">Shengming Chang</a>, <a href="/search/eess?searchtype=author&amp;query=Bao%2C+S">Shudi Bao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.11951v1-abstract-short" style="display: inline;"> Accurate and reliable localization is crucial for various wireless communication applications. Numerous studies have proposed accurate localization methods using hybrid received signal strength (RSS) and angle of arrival (AOA) measurements. However, these studies typically assume identical measurement noise distributions for different anchor nodes, which may not accurately reflect real-world scena&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11951v1-abstract-full').style.display = 'inline'; document.getElementById('2307.11951v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.11951v1-abstract-full" style="display: none;"> Accurate and reliable localization is crucial for various wireless communication applications. Numerous studies have proposed accurate localization methods using hybrid received signal strength (RSS) and angle of arrival (AOA) measurements. However, these studies typically assume identical measurement noise distributions for different anchor nodes, which may not accurately reflect real-world scenarios with varying noise distributions. In this paper, we propose a simple and efficient localization method based on hybrid RSS-AOA measurements that accounts for the varying measurement noises of different nodes. We derive a closed-form estimator for the target location based on the linear weighted least squares (LWLS) algorithm, with each LWLS equation weight being the inverse of its residual variance. Due to the unknown variances of LWLS equation residuals, we employ a two-stage LWLS method for estimation. The proposed method is computationally efficient, adaptable to different types of wireless communication systems and environments, and provides more accurate and reliable localization results compared to existing RSS-AOA localization techniques. Additionally, we derive the Cramer-Rao Lower Bound (CRLB) for the RSS-AOA signal sequences used in the proposed method. Simulation results demonstrate the superiority of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11951v1-abstract-full').style.display = 'none'; document.getElementById('2307.11951v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.11950">arXiv:2307.11950</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.11950">pdf</a>, <a href="https://arxiv.org/format/2307.11950">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Accurate RSS-Based Localization Using an Opposition-Based Learning Simulated Annealing Algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weizhong Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Chang%2C+S">Shengming Chang</a>, <a href="/search/eess?searchtype=author&amp;query=Bao%2C+S">Shudi Bao</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+M">Meng Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+J">Jie Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.11950v1-abstract-short" style="display: inline;"> Wireless sensor networks require accurate target localization, often achieved through received signal strength (RSS) localization estimation based on maximum likelihood (ML). However, ML-based algorithms can suffer from issues such as low diversity, slow convergence, and local optima, which can significantly affect localization performance. In this paper, we propose a novel localization algorithm&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11950v1-abstract-full').style.display = 'inline'; document.getElementById('2307.11950v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.11950v1-abstract-full" style="display: none;"> Wireless sensor networks require accurate target localization, often achieved through received signal strength (RSS) localization estimation based on maximum likelihood (ML). However, ML-based algorithms can suffer from issues such as low diversity, slow convergence, and local optima, which can significantly affect localization performance. In this paper, we propose a novel localization algorithm that combines opposition-based learning (OBL) and simulated annealing algorithm (SAA) to address these challenges. The algorithm begins by generating an initial solution randomly, which serves as the starting point for the SAA. Subsequently, OBL is employed to generate an opposing initial solution, effectively providing an alternative initial solution. The SAA is then executed independently on both the original and opposing initial solutions, optimizing each towards a potential optimal solution. The final solution is selected as the more effective of the two outcomes from the SAA, thereby reducing the likelihood of the algorithm becoming trapped in local optima. Simulation results indicate that the proposed algorithm consistently outperforms existing algorithms in terms of localization accuracy, demonstrating the effectiveness of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11950v1-abstract-full').style.display = 'none'; document.getElementById('2307.11950v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.01979">arXiv:2307.01979</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.01979">pdf</a>, <a href="https://arxiv.org/format/2307.01979">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ToothSegNet: Image Degradation meets Tooth Segmentation in CBCT Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+J">Jiaxiang Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Hu%2C+T">Tianxiang Hu</a>, <a href="/search/eess?searchtype=author&amp;query=Feng%2C+Y">Yang Feng</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wanghui Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Z">Zuozhu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.01979v1-abstract-short" style="display: inline;"> In computer-assisted orthodontics, three-dimensional tooth models are required for many medical treatments. Tooth segmentation from cone-beam computed tomography (CBCT) images is a crucial step in constructing the models. However, CBCT image quality problems such as metal artifacts and blurring caused by shooting equipment and patients&#39; dental conditions make the segmentation difficult. In this pa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01979v1-abstract-full').style.display = 'inline'; document.getElementById('2307.01979v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.01979v1-abstract-full" style="display: none;"> In computer-assisted orthodontics, three-dimensional tooth models are required for many medical treatments. Tooth segmentation from cone-beam computed tomography (CBCT) images is a crucial step in constructing the models. However, CBCT image quality problems such as metal artifacts and blurring caused by shooting equipment and patients&#39; dental conditions make the segmentation difficult. In this paper, we propose ToothSegNet, a new framework which acquaints the segmentation model with generated degraded images during training. ToothSegNet merges the information of high and low quality images from the designed degradation simulation module using channel-wise cross fusion to reduce the semantic gap between encoder and decoder, and also refines the shape of tooth prediction through a structural constraint loss. Experimental results suggest that ToothSegNet produces more precise segmentation and outperforms the state-of-the-art medical image segmentation methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01979v1-abstract-full').style.display = 'none'; document.getElementById('2307.01979v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE ISBI 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.15161">arXiv:2306.15161</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.15161">pdf</a>, <a href="https://arxiv.org/format/2306.15161">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Wespeaker baselines for VoxSRC2023 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wang%2C+S">Shuai Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Liang%2C+C">Chengdong Liang</a>, <a href="/search/eess?searchtype=author&amp;query=Xiang%2C+X">Xu Xiang</a>, <a href="/search/eess?searchtype=author&amp;query=Han%2C+B">Bing Han</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+Z">Zhengyang Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+H">Hongji Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wen Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.15161v2-abstract-short" style="display: inline;"> This report showcases the results achieved using the wespeaker toolkit for the VoxSRC2023 Challenge. Our aim is to provide participants, especially those with limited experience, with clear and straightforward guidelines to develop their initial systems. Via well-structured recipes and strong results, we hope to offer an accessible and good enough start point for all interested individuals. In thi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.15161v2-abstract-full').style.display = 'inline'; document.getElementById('2306.15161v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.15161v2-abstract-full" style="display: none;"> This report showcases the results achieved using the wespeaker toolkit for the VoxSRC2023 Challenge. Our aim is to provide participants, especially those with limited experience, with clear and straightforward guidelines to develop their initial systems. Via well-structured recipes and strong results, we hope to offer an accessible and good enough start point for all interested individuals. In this report, we describe the results achieved on the VoxSRC2023 dev set using the pretrained models, you can check the CodaLab evaluation server for the results on the evaluation set. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.15161v2-abstract-full').style.display = 'none'; document.getElementById('2306.15161v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.02962">arXiv:2305.02962</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.02962">pdf</a>, <a href="https://arxiv.org/ps/2305.02962">ps</a>, <a href="https://arxiv.org/format/2305.02962">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> An Efficient Relay Selection Scheme for Relay-assisted HARQ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weihang Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Shikh-Bahaei%2C+M">Mohammad Shikh-Bahaei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.02962v1-abstract-short" style="display: inline;"> In wireless communication networks, relays are required when the quality of the direct link between the source and the destination is not high enough to support reliable transmission because of long distances or obstacles. Selecting the proper relay node (RN) to support hybrid automatic repeat request (HARQ) is of great importance in such a relay-assisted network. Different from previous works, wh&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.02962v1-abstract-full').style.display = 'inline'; document.getElementById('2305.02962v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.02962v1-abstract-full" style="display: none;"> In wireless communication networks, relays are required when the quality of the direct link between the source and the destination is not high enough to support reliable transmission because of long distances or obstacles. Selecting the proper relay node (RN) to support hybrid automatic repeat request (HARQ) is of great importance in such a relay-assisted network. Different from previous works, whether to participate in the transmission is determined by each RN itself in this work, thus reducing the overhead. As RNs do not need to obtain channel state information about the whole network, there is no significant overhead in the system. Using the numbers of transmission attempts required by both channels calculated from the obtained channel state information, each RN sets a timer and forwards the packet when time-out occurs. Simulation results show that our proposed method significantly improves the performance of the system. When the channels are of relatively high quality, the performance of our method is close to the optimal relay selection which requires full information about the network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.02962v1-abstract-full').style.display = 'none'; document.getElementById('2305.02962v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.02948">arXiv:2305.02948</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.02948">pdf</a>, <a href="https://arxiv.org/ps/2305.02948">ps</a>, <a href="https://arxiv.org/format/2305.02948">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> HARQ Delay Minimization of 5G Wireless Network with Imperfect Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weihang Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Shikh-Bahaei%2C+M">Mohammad Shikh-Bahaei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.02948v1-abstract-short" style="display: inline;"> 5G new radio (NR) technology is introduced to satisfy more demanding services. Ultra-Reliable Low Latency Communication (URLLC) requires very low delay compared with the previous techniques. This is hard to achieve when hybrid automatic repeat request (HARQ) is applied and especially when the feedback channel is erroneous. In this work, we consider various delay components in incremental redundanc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.02948v1-abstract-full').style.display = 'inline'; document.getElementById('2305.02948v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.02948v1-abstract-full" style="display: none;"> 5G new radio (NR) technology is introduced to satisfy more demanding services. Ultra-Reliable Low Latency Communication (URLLC) requires very low delay compared with the previous techniques. This is hard to achieve when hybrid automatic repeat request (HARQ) is applied and especially when the feedback channel is erroneous. In this work, we consider various delay components in incremental redundancy (IR) HARQ systems and minimize the average delay by applying asymmetric feedback detection (AFD) and find the optimal transmission length for each transmission attempt. A M/G/1 queuing model is used in this work to analyze the queuing delay in 5G NR when there are multiple uses in the system. Numerical results show that significant performance gains and lower outage probability can be achieved by applying AFD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.02948v1-abstract-full').style.display = 'none'; document.getElementById('2305.02948v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.03537">arXiv:2302.03537</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.03537">pdf</a>, <a href="https://arxiv.org/format/2302.03537">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Aligning Multi-Sequence CMR Towards Fully Automated Myocardial Pathology Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wangbin Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+L">Lei Li</a>, <a href="/search/eess?searchtype=author&amp;query=Qiu%2C+J">Junyi Qiu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+S">Sihan Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Liqin Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+Y">Yinyin Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+S">Shan Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhuang%2C+X">Xiahai Zhuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.03537v1-abstract-short" style="display: inline;"> Myocardial pathology segmentation (MyoPS) is critical for the risk stratification and treatment planning of myocardial infarction (MI). Multi-sequence cardiac magnetic resonance (MS-CMR) images can provide valuable information. For instance, balanced steady-state free precession cine sequences present clear anatomical boundaries, while late gadolinium enhancement and T2-weighted CMR sequences visu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.03537v1-abstract-full').style.display = 'inline'; document.getElementById('2302.03537v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.03537v1-abstract-full" style="display: none;"> Myocardial pathology segmentation (MyoPS) is critical for the risk stratification and treatment planning of myocardial infarction (MI). Multi-sequence cardiac magnetic resonance (MS-CMR) images can provide valuable information. For instance, balanced steady-state free precession cine sequences present clear anatomical boundaries, while late gadolinium enhancement and T2-weighted CMR sequences visualize myocardial scar and edema of MI, respectively. Existing methods usually fuse anatomical and pathological information from different CMR sequences for MyoPS, but assume that these images have been spatially aligned. However, MS-CMR images are usually unaligned due to the respiratory motions in clinical practices, which poses additional challenges for MyoPS. This work presents an automatic MyoPS framework for unaligned MS-CMR images. Specifically, we design a combined computing model for simultaneous image registration and information fusion, which aggregates multi-sequence features into a common space to extract anatomical structures (i.e., myocardium). Consequently, we can highlight the informative regions in the common space via the extracted myocardium to improve MyoPS performance, considering the spatial relationship between myocardial pathologies and myocardium. Experiments on a private MS-CMR dataset and a public dataset from the MYOPS2020 challenge show that our framework could achieve promising performance for fully automatic MyoPS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.03537v1-abstract-full').style.display = 'none'; document.getElementById('2302.03537v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.00676">arXiv:2302.00676</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.00676">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Light Extraction of Organic Light Emitting Diodes by Deep-Groove High-index Dielectric Nanomesh Using Large-area Nanoimprint </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Qi%2C+J">Ji Qi</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wei Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Y">Yuxuan Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+H">Hao Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Chou%2C+S+Y">Stephen Y. Chou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.00676v1-abstract-short" style="display: inline;"> To solve the conventional conflict between maintaining good charge transport property and achieving high light extraction efficiency when using micro/nanostructure patterned substrates to extract light from organic light emitting diodes (OLEDs), we developed a novel OLED structure, termed High-index Deep-Groove Dielectric Nanomesh OLED (HDNM-OLED), fabricated by large-area nanoimprint lithography&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00676v1-abstract-full').style.display = 'inline'; document.getElementById('2302.00676v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.00676v1-abstract-full" style="display: none;"> To solve the conventional conflict between maintaining good charge transport property and achieving high light extraction efficiency when using micro/nanostructure patterned substrates to extract light from organic light emitting diodes (OLEDs), we developed a novel OLED structure, termed High-index Deep-Groove Dielectric Nanomesh OLED (HDNM-OLED), fabricated by large-area nanoimprint lithography (NIL). The key component is a nanostructure-patterned substrate embedded with a high-index deep-groove nanomesh and capped with a low-index planarization layer. The high-index and deep-groove nanomesh efficiently releases the tapped photons to achieve significantly enhanced light extraction. And the planarization layer helps to maintain the good charge transport property of the organic active layers deposited on top of it. For a green phosphorescent OLED in our demonstration, with the HDNM-OLED structure, compared to planar conventional ITO-OLED structure, the external quantum efficiency (EQE) was enhanced by 1.85-fold from 26% to 48% and power efficiency was enhanced by 1.86-fold from 42lm/W to 79lm/W. Besides green OELDs, the HDNM-OLED structure was also shown to be able to work for red and blue-emitting OELDs and achieved enhanced light extraction efficiency (1.58-fold for red light, 1.86-fold for blue light) without further structure modification, which demonstrated the light extraction enhancement by the HDNM-OLED is broadband. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00676v1-abstract-full').style.display = 'none'; document.getElementById('2302.00676v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2302.00044</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.10171">arXiv:2301.10171</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2301.10171">pdf</a>, <a href="https://arxiv.org/format/2301.10171">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Spectral Cross-Domain Neural Network with Soft-adaptive Threshold Spectral Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+C">Che Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Cheng%2C+S">Sibo Cheng</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weiping Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Arcucci%2C+R">Rossella Arcucci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.10171v2-abstract-short" style="display: inline;"> Electrocardiography (ECG) signals can be considered as multi-variable time-series. The state-of-the-art ECG data classification approaches, based on either feature engineering or deep learning techniques, treat separately spectral and time domains in machine learning systems. No spectral-time domain communication mechanism inside the classifier model can be found in current approaches, leading to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10171v2-abstract-full').style.display = 'inline'; document.getElementById('2301.10171v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.10171v2-abstract-full" style="display: none;"> Electrocardiography (ECG) signals can be considered as multi-variable time-series. The state-of-the-art ECG data classification approaches, based on either feature engineering or deep learning techniques, treat separately spectral and time domains in machine learning systems. No spectral-time domain communication mechanism inside the classifier model can be found in current approaches, leading to difficulties in identifying complex ECG forms. In this paper, we proposed a novel deep learning model named Spectral Cross-domain neural network (SCDNN) with a new block called Soft-adaptive threshold spectral enhancement (SATSE), to simultaneously reveal the key information embedded in spectral and time domains inside the neural network. More precisely, the domain-cross information is captured by a general Convolutional neural network (CNN) backbone, and different information sources are merged by a self-adaptive mechanism to mine the connection between time and spectral domains. In SATSE, the knowledge from time and spectral domains is extracted via the Fast Fourier Transformation (FFT) with soft trainable thresholds in modified Sigmoid functions. The proposed SCDNN is tested with several classification tasks implemented on the public ECG databases \textit{PTB-XL} and \textit{MIT-BIH}. SCDNN outperforms the state-of-the-art approaches with a low computational cost regarding a variety of metrics in all classification tasks on both databases, by finding appropriate domains from the infinite spectral mapping. The convergence of the trainable thresholds in the spectral domain is also numerically investigated in this paper. The robust performance of SCDNN provides a new perspective to exploit knowledge across deep learning models from time and spectral domains. The repository can be found: https://github.com/DL-WG/SCDNN-TS <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10171v2-abstract-full').style.display = 'none'; document.getElementById('2301.10171v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.04717">arXiv:2211.04717</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.04717">pdf</a>, <a href="https://arxiv.org/format/2211.04717">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Improving Noisy Student Training on Non-target Domain Data for Automatic Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Chen%2C+Y">Yu Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wen Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Lai%2C+J">Junjie Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.04717v2-abstract-short" style="display: inline;"> Noisy Student Training (NST) has recently demonstrated extremely strong performance in Automatic Speech Recognition(ASR). In this paper, we propose a data selection strategy named LM Filter to improve the performance of NST on non-target domain data in ASR tasks. Hypotheses with and without a Language Model are generated and the CER differences between them are utilized as a filter threshold. Resu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04717v2-abstract-full').style.display = 'inline'; document.getElementById('2211.04717v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.04717v2-abstract-full" style="display: none;"> Noisy Student Training (NST) has recently demonstrated extremely strong performance in Automatic Speech Recognition(ASR). In this paper, we propose a data selection strategy named LM Filter to improve the performance of NST on non-target domain data in ASR tasks. Hypotheses with and without a Language Model are generated and the CER differences between them are utilized as a filter threshold. Results reveal that significant improvements of 10.4% compared with no data filtering baselines. We can achieve 3.31% CER in AISHELL-1 test set, which is best result from our knowledge without any other supervised data. We also perform evaluations on the supervised 1000 hour AISHELL-2 dataset and competitive results of 4.73% CER can be achieved. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.04717v2-abstract-full').style.display = 'none'; document.getElementById('2211.04717v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted by the ICASSP 2023 conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.12881">arXiv:2208.12881</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2208.12881">pdf</a>, <a href="https://arxiv.org/format/2208.12881">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-Modality Cardiac Image Computing: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Li%2C+L">Lei Li</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wangbin Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Liqun Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhuang%2C+X">Xiahai Zhuang</a>, <a href="/search/eess?searchtype=author&amp;query=Grau%2C+V">Vicente Grau</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.12881v1-abstract-short" style="display: inline;"> Multi-modality cardiac imaging plays a key role in the management of patients with cardiovascular diseases. It allows a combination of complementary anatomical, morphological and functional information, increases diagnosis accuracy, and improves the efficacy of cardiovascular interventions and clinical outcomes. Fully-automated processing and quantitative analysis of multi-modality cardiac images&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.12881v1-abstract-full').style.display = 'inline'; document.getElementById('2208.12881v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.12881v1-abstract-full" style="display: none;"> Multi-modality cardiac imaging plays a key role in the management of patients with cardiovascular diseases. It allows a combination of complementary anatomical, morphological and functional information, increases diagnosis accuracy, and improves the efficacy of cardiovascular interventions and clinical outcomes. Fully-automated processing and quantitative analysis of multi-modality cardiac images could have a direct impact on clinical research and evidence-based patient management. However, these require overcoming significant challenges including inter-modality misalignment and finding optimal methods to integrate information from different modalities. This paper aims to provide a comprehensive review of multi-modality imaging in cardiology, the computing methods, the validation strategies, the related clinical workflows and future perspectives. For the computing methodologies, we have a favored focus on the three tasks, i.e., registration, fusion and segmentation, which generally involve multi-modality imaging data, \textit{either combining information from different modalities or transferring information across modalities}. The review highlights that multi-modality cardiac imaging data has the potential of wide applicability in the clinic, such as trans-aortic valve implantation guidance, myocardial viability assessment, and catheter ablation therapy and its patient selection. Nevertheless, many challenges remain unsolved, such as missing modality, combination of imaging and non-imaging data, and uniform analysis and representation of different modalities. There is also work to do in defining how the well-developed techniques fit in clinical workflows and how much additional and relevant information they introduce. These problems are likely to continue to be an active field of research and the questions to be answered in the future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.12881v1-abstract-full').style.display = 'none'; document.getElementById('2208.12881v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.04940">arXiv:2208.04940</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2208.04940">pdf</a>, <a href="https://arxiv.org/format/2208.04940">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-Depth Boundary-Aware Left Atrial Scar Segmentation Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Wu%2C+M">Mengjun Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wangbin Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+M">Mingjin Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Liqin Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.04940v1-abstract-short" style="display: inline;"> Automatic segmentation of left atrial (LA) scars from late gadolinium enhanced CMR images is a crucial step for atrial fibrillation (AF) recurrence analysis. However, delineating LA scars is tedious and error-prone due to the variation of scar shapes. In this work, we propose a boundary-aware LA scar segmentation network, which is composed of two branches to segment LA and LA scars, respectively.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.04940v1-abstract-full').style.display = 'inline'; document.getElementById('2208.04940v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.04940v1-abstract-full" style="display: none;"> Automatic segmentation of left atrial (LA) scars from late gadolinium enhanced CMR images is a crucial step for atrial fibrillation (AF) recurrence analysis. However, delineating LA scars is tedious and error-prone due to the variation of scar shapes. In this work, we propose a boundary-aware LA scar segmentation network, which is composed of two branches to segment LA and LA scars, respectively. We explore the inherent spatial relationship between LA and LA scars. By introducing a Sobel fusion module between the two segmentation branches, the spatial information of LA boundaries can be propagated from the LA branch to the scar branch. Thus, LA scar segmentation can be performed condition on the LA boundaries regions. In our experiments, 40 labeled images were used to train the proposed network, and the remaining 20 labeled images were used for evaluation. The network achieved an average Dice score of 0.608 for LA scar segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.04940v1-abstract-full').style.display = 'none'; document.getElementById('2208.04940v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.04645">arXiv:2204.04645</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.04645">pdf</a>, <a href="https://arxiv.org/format/2204.04645">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Self-Supervised Audio-and-Text Pre-training with Extremely Low-Resource Parallel Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Kang%2C+Y">Yu Kang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+T">Tianqiao Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+H">Hang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Hao%2C+Y">Yang Hao</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenbiao Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.04645v1-abstract-short" style="display: inline;"> Multimodal pre-training for audio-and-text has recently been proved to be effective and has significantly improved the performance of many downstream speech understanding tasks. However, these state-of-the-art pre-training audio-text models work well only when provided with large amount of parallel audio-and-text data, which brings challenges on many languages that are rich in unimodal corpora but&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.04645v1-abstract-full').style.display = 'inline'; document.getElementById('2204.04645v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.04645v1-abstract-full" style="display: none;"> Multimodal pre-training for audio-and-text has recently been proved to be effective and has significantly improved the performance of many downstream speech understanding tasks. However, these state-of-the-art pre-training audio-text models work well only when provided with large amount of parallel audio-and-text data, which brings challenges on many languages that are rich in unimodal corpora but scarce of parallel cross-modal corpus. In this paper, we investigate whether it is possible to pre-train an audio-text multimodal model with extremely low-resource parallel data and extra non-parallel unimodal data. Our pre-training framework consists of the following components: (1) Intra-modal Denoising Auto-Encoding (IDAE), which is able to reconstruct input text (audio) representations from a noisy version of itself. (2) Cross-modal Denoising Auto-Encoding (CDAE), which is pre-trained to reconstruct the input text (audio), given both a noisy version of the input text (audio) and the corresponding translated noisy audio features (text embeddings). (3) Iterative Denoising Process (IDP), which iteratively translates raw audio (text) and the corresponding text embeddings (audio features) translated from previous iteration into the new less-noisy text embeddings (audio features). We adapt a dual cross-modal Transformer as our backbone model which consists of two unimodal encoders for IDAE and two cross-modal encoders for CDAE and IDP. Our method achieves comparable performance on multiple downstream speech understanding tasks compared with the model pre-trained on fully parallel data, demonstrating the great potential of the proposed method. Our code is available at: \url{https://github.com/KarlYuKang/Low-Resource-Multimodal-Pre-training}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.04645v1-abstract-full').style.display = 'none'; document.getElementById('2204.04645v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.05784">arXiv:2203.05784</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2203.05784">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AI-enabled Automatic Multimodal Fusion of Cone-Beam CT and Intraoral Scans for Intelligent 3D Tooth-Bone Reconstruction and Clinical Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Hao%2C+J">Jin Hao</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+J">Jiaxiang Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+J">Jin Li</a>, <a href="/search/eess?searchtype=author&amp;query=Pan%2C+W">Wei Pan</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+R">Ruizhe Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Xiong%2C+H">Huimin Xiong</a>, <a href="/search/eess?searchtype=author&amp;query=Sun%2C+K">Kaiwei Sun</a>, <a href="/search/eess?searchtype=author&amp;query=Lin%2C+H">Hangzheng Lin</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+W">Wanlu Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wanghui Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+J">Jianfei Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Hu%2C+H">Haoji Hu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+Y">Yueling Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Feng%2C+Y">Yang Feng</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+Z">Zeyu Zhao</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+H">Huikai Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+Y">Youyi Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Fang%2C+B">Bing Fang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Z">Zuozhu Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+Z">Zhihe Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.05784v1-abstract-short" style="display: inline;"> A critical step in virtual dental treatment planning is to accurately delineate all tooth-bone structures from CBCT with high fidelity and accurate anatomical information. Previous studies have established several methods for CBCT segmentation using deep learning. However, the inherent resolution discrepancy of CBCT and the loss of occlusal and dentition information largely limited its clinical ap&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.05784v1-abstract-full').style.display = 'inline'; document.getElementById('2203.05784v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.05784v1-abstract-full" style="display: none;"> A critical step in virtual dental treatment planning is to accurately delineate all tooth-bone structures from CBCT with high fidelity and accurate anatomical information. Previous studies have established several methods for CBCT segmentation using deep learning. However, the inherent resolution discrepancy of CBCT and the loss of occlusal and dentition information largely limited its clinical applicability. Here, we present a Deep Dental Multimodal Analysis (DDMA) framework consisting of a CBCT segmentation model, an intraoral scan (IOS) segmentation model (the most accurate digital dental model), and a fusion model to generate 3D fused crown-root-bone structures with high fidelity and accurate occlusal and dentition information. Our model was trained with a large-scale dataset with 503 CBCT and 28,559 IOS meshes manually annotated by experienced human experts. For CBCT segmentation, we use a five-fold cross validation test, each with 50 CBCT, and our model achieves an average Dice coefficient and IoU of 93.99% and 88.68%, respectively, significantly outperforming the baselines. For IOS segmentations, our model achieves an mIoU of 93.07% and 95.70% on the maxillary and mandible on a test set of 200 IOS meshes, which are 1.77% and 3.52% higher than the state-of-art method. Our DDMA framework takes about 20 to 25 minutes to generate the fused 3D mesh model following the sequential processing order, compared to over 5 hours by human experts. Notably, our framework has been incorporated into a software by a clear aligner manufacturer, and real-world clinical cases demonstrate that our model can visualize crown-root-bone structures during the entire orthodontic treatment and can predict risks like dehiscence and fenestration. These findings demonstrate the potential of multi-modal deep learning to improve the quality of digital dental models and help dentists make better clinical decisions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.05784v1-abstract-full').style.display = 'none'; document.getElementById('2203.05784v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 6 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.02000">arXiv:2202.02000</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2202.02000">pdf</a>, <a href="https://arxiv.org/format/2202.02000">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Cross-Modality Multi-Atlas Segmentation via Deep Registration and Label Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wangbin Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+L">Lei Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhuang%2C+X">Xiahai Zhuang</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Liqin Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.02000v3-abstract-short" style="display: inline;"> Multi-atlas segmentation (MAS) is a promising framework for medical image segmentation. Generally, MAS methods register multiple atlases, i.e., medical images with corresponding labels, to a target image; and the transformed atlas labels can be combined to generate target segmentation via label fusion schemes. Many conventional MAS methods employed the atlases from the same modality as the target&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.02000v3-abstract-full').style.display = 'inline'; document.getElementById('2202.02000v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.02000v3-abstract-full" style="display: none;"> Multi-atlas segmentation (MAS) is a promising framework for medical image segmentation. Generally, MAS methods register multiple atlases, i.e., medical images with corresponding labels, to a target image; and the transformed atlas labels can be combined to generate target segmentation via label fusion schemes. Many conventional MAS methods employed the atlases from the same modality as the target image. However, the number of atlases with the same modality may be limited or even missing in many clinical applications. Besides, conventional MAS methods suffer from the computational burden of registration or label fusion procedures. In this work, we design a novel cross-modality MAS framework, which uses available atlases from a certain modality to segment a target image from another modality. To boost the computational efficiency of the framework, both the image registration and label fusion are achieved by well-designed deep neural networks. For the atlas-to-target image registration, we propose a bi-directional registration network (BiRegNet), which can efficiently align images from different modalities. For the label fusion, we design a similarity estimation network (SimNet), which estimates the fusion weight of each atlas by measuring its similarity to the target image. SimNet can learn multi-scale information for similarity estimation to improve the performance of label fusion. The proposed framework was evaluated by the left ventricle and liver segmentation tasks on the MM-WHS and CHAOS datasets, respectively. Results have shown that the framework is effective for cross-modality MAS in both registration and label fusion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.02000v3-abstract-full').style.display = 'none'; document.getElementById('2202.02000v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.05758">arXiv:2112.05758</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2112.05758">pdf</a>, <a href="https://arxiv.org/format/2112.05758">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Edge-Enhanced Dual Discriminator Generative Adversarial Network for Fast MRI with Parallel Imaging Using Multi-view Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weiping Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Lv%2C+J">Jun Lv</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+J">Jingwen Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Dong%2C+H">Hao Dong</a>, <a href="/search/eess?searchtype=author&amp;query=Del+Ser%2C+J">Javier Del Ser</a>, <a href="/search/eess?searchtype=author&amp;query=Xia%2C+J">Jun Xia</a>, <a href="/search/eess?searchtype=author&amp;query=Ren%2C+T">Tiaojuan Ren</a>, <a href="/search/eess?searchtype=author&amp;query=Wong%2C+S">Stephen Wong</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.05758v1-abstract-short" style="display: inline;"> In clinical medicine, magnetic resonance imaging (MRI) is one of the most important tools for diagnosis, triage, prognosis, and treatment planning. However, MRI suffers from an inherent slow data acquisition process because data is collected sequentially in k-space. In recent years, most MRI reconstruction methods proposed in the literature focus on holistic image reconstruction rather than enhanc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.05758v1-abstract-full').style.display = 'inline'; document.getElementById('2112.05758v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.05758v1-abstract-full" style="display: none;"> In clinical medicine, magnetic resonance imaging (MRI) is one of the most important tools for diagnosis, triage, prognosis, and treatment planning. However, MRI suffers from an inherent slow data acquisition process because data is collected sequentially in k-space. In recent years, most MRI reconstruction methods proposed in the literature focus on holistic image reconstruction rather than enhancing the edge information. This work steps aside this general trend by elaborating on the enhancement of edge information. Specifically, we introduce a novel parallel imaging coupled dual discriminator generative adversarial network (PIDD-GAN) for fast multi-channel MRI reconstruction by incorporating multi-view information. The dual discriminator design aims to improve the edge information in MRI reconstruction. One discriminator is used for holistic image reconstruction, whereas the other one is responsible for enhancing edge information. An improved U-Net with local and global residual learning is proposed for the generator. Frequency channel attention blocks (FCA Blocks) are embedded in the generator for incorporating attention mechanisms. Content loss is introduced to train the generator for better reconstruction quality. We performed comprehensive experiments on Calgary-Campinas public brain MR dataset and compared our method with state-of-the-art MRI reconstruction methods. Ablation studies of residual learning were conducted on the MICCAI13 dataset to validate the proposed modules. Results show that our PIDD-GAN provides high-quality reconstructed MR images, with well-preserved edge information. The time of single-image reconstruction is below 5ms, which meets the demand of faster processing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.05758v1-abstract-full').style.display = 'none'; document.getElementById('2112.05758v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, 13 figures, Applied Intelligence</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.04984">arXiv:2112.04984</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2112.04984">pdf</a>, <a href="https://arxiv.org/format/2112.04984">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Robust Weakly Supervised Learning for COVID-19 Recognition Using Multi-Center CT Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ye%2C+Q">Qinghao Ye</a>, <a href="/search/eess?searchtype=author&amp;query=Gao%2C+Y">Yuan Gao</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weiping Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Niu%2C+Z">Zhangming Niu</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+C">Chengjia Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Jiang%2C+Y">Yinghui Jiang</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+M">Minhao Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Fang%2C+E+F">Evandro Fei Fang</a>, <a href="/search/eess?searchtype=author&amp;query=Menpes-Smith%2C+W">Wade Menpes-Smith</a>, <a href="/search/eess?searchtype=author&amp;query=Xia%2C+J">Jun Xia</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.04984v1-abstract-short" style="display: inline;"> The world is currently experiencing an ongoing pandemic of an infectious disease named coronavirus disease 2019 (i.e., COVID-19), which is caused by the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). Computed Tomography (CT) plays an important role in assessing the severity of the infection and can also be used to identify those symptomatic and asymptomatic COVID-19 carriers. With a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.04984v1-abstract-full').style.display = 'inline'; document.getElementById('2112.04984v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.04984v1-abstract-full" style="display: none;"> The world is currently experiencing an ongoing pandemic of an infectious disease named coronavirus disease 2019 (i.e., COVID-19), which is caused by the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). Computed Tomography (CT) plays an important role in assessing the severity of the infection and can also be used to identify those symptomatic and asymptomatic COVID-19 carriers. With a surge of the cumulative number of COVID-19 patients, radiologists are increasingly stressed to examine the CT scans manually. Therefore, an automated 3D CT scan recognition tool is highly in demand since the manual analysis is time-consuming for radiologists and their fatigue can cause possible misjudgment. However, due to various technical specifications of CT scanners located in different hospitals, the appearance of CT images can be significantly different leading to the failure of many automated image recognition approaches. The multi-domain shift problem for the multi-center and multi-scanner studies is therefore nontrivial that is also crucial for a dependable recognition and critical for reproducible and objective diagnosis and prognosis. In this paper, we proposed a COVID-19 CT scan recognition model namely coronavirus information fusion and diagnosis network (CIFD-Net) that can efficiently handle the multi-domain shift problem via a new robust weakly supervised learning paradigm. Our model can resolve the problem of different appearance in CT scan images reliably and efficiently while attaining higher accuracy compared to other state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.04984v1-abstract-full').style.display = 'none'; document.getElementById('2112.04984v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages, 8 figures, Applied Soft Computing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.08154">arXiv:2111.08154</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2111.08154">pdf</a>, <a href="https://arxiv.org/format/2111.08154">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSMC.2019.2917599">10.1109/TSMC.2019.2917599 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> On the utility of power spectral techniques with feature selection techniques for effective mental task classification in noninvasive BCI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Gupta%2C+A">Akshansh Gupta</a>, <a href="/search/eess?searchtype=author&amp;query=Agrawal%2C+R+K">Ramesh Kumar Agrawal</a>, <a href="/search/eess?searchtype=author&amp;query=Kirar%2C+J+S">Jyoti Singh Kirar</a>, <a href="/search/eess?searchtype=author&amp;query=Andreu-Perez%2C+J">Javier Andreu-Perez</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wei-Ping Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Lin%2C+C">Chin-Teng Lin</a>, <a href="/search/eess?searchtype=author&amp;query=Prasad%2C+M">Mukesh Prasad</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.08154v1-abstract-short" style="display: inline;"> In this paper classification of mental task-root Brain-Computer Interfaces (BCI) is being investigated, as those are a dominant area of investigations in BCI and are of utmost interest as these systems can be augmented life of people having severe disabilities. The BCI model&#39;s performance is primarily dependent on the size of the feature vector, which is obtained through multiple channels. In the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.08154v1-abstract-full').style.display = 'inline'; document.getElementById('2111.08154v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.08154v1-abstract-full" style="display: none;"> In this paper classification of mental task-root Brain-Computer Interfaces (BCI) is being investigated, as those are a dominant area of investigations in BCI and are of utmost interest as these systems can be augmented life of people having severe disabilities. The BCI model&#39;s performance is primarily dependent on the size of the feature vector, which is obtained through multiple channels. In the case of mental task classification, the availability of training samples to features are minimal. Very often, feature selection is used to increase the ratio for the mental task classification by getting rid of irrelevant and superfluous features. This paper proposes an approach to select relevant and non-redundant spectral features for the mental task classification. This can be done by using four very known multivariate feature selection methods viz, Bhattacharya&#39;s Distance, Ratio of Scatter Matrices, Linear Regression and Minimum Redundancy &amp; Maximum Relevance. This work also deals with a comparative analysis of multivariate and univariate feature selection for mental task classification. After applying the above-stated method, the findings demonstrate substantial improvements in the performance of the learning model for mental task classification. Moreover, the efficacy of the proposed approach is endorsed by carrying out a robust ranking algorithm and Friedman&#39;s statistical test for finding the best combinations and comparing different combinations of power spectral density and feature selection methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.08154v1-abstract-full').style.display = 'none'; document.getElementById('2111.08154v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Systems, Man, and Cybernetics: Systems 51.5 (2019): 3080-3092 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.02171">arXiv:2109.02171</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.02171">pdf</a>, <a href="https://arxiv.org/format/2109.02171">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Right Ventricular Segmentation from Short- and Long-Axis MRIs via Information Transition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Li%2C+L">Lei Li</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wangbin Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Liqun Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhuang%2C+X">Xiahai Zhuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.02171v1-abstract-short" style="display: inline;"> Right ventricular (RV) segmentation from magnetic resonance imaging (MRI) is a crucial step for cardiac morphology and function analysis. However, automatic RV segmentation from MRI is still challenging, mainly due to the heterogeneous intensity, the complex variable shapes, and the unclear RV boundary. Moreover, current methods for the RV segmentation tend to suffer from performance degradation a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.02171v1-abstract-full').style.display = 'inline'; document.getElementById('2109.02171v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.02171v1-abstract-full" style="display: none;"> Right ventricular (RV) segmentation from magnetic resonance imaging (MRI) is a crucial step for cardiac morphology and function analysis. However, automatic RV segmentation from MRI is still challenging, mainly due to the heterogeneous intensity, the complex variable shapes, and the unclear RV boundary. Moreover, current methods for the RV segmentation tend to suffer from performance degradation at the basal and apical slices of MRI. In this work, we propose an automatic RV segmentation framework, where the information from long-axis (LA) views is utilized to assist the segmentation of short-axis (SA) views via information transition. Specifically, we employed the transformed segmentation from LA views as a prior information, to extract the ROI from SA views for better segmentation. The information transition aims to remove the surrounding ambiguous regions in the SA views. %, such as the tricuspid valve regions. We tested our model on a public dataset with 360 multi-center, multi-vendor and multi-disease subjects that consist of both LA and SA MRIs. Our experimental results show that including LA views can be effective to improve the accuracy of the SA segmentation. Our model is publicly available at https://github.com/NanYoMy/MMs-2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.02171v1-abstract-full').style.display = 'none'; document.getElementById('2109.02171v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">None</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.07956">arXiv:2107.07956</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2107.07956">pdf</a>, <a href="https://arxiv.org/format/2107.07956">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> A Multimodal Machine Learning Framework for Teacher Vocal Delivery Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Li%2C+H">Hang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Kang%2C+Y">Yu Kang</a>, <a href="/search/eess?searchtype=author&amp;query=Hao%2C+Y">Yang Hao</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenbiao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+Z">Zhongqin Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Z">Zitao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.07956v1-abstract-short" style="display: inline;"> The quality of vocal delivery is one of the key indicators for evaluating teacher enthusiasm, which has been widely accepted to be connected to the overall course qualities. However, existing evaluation for vocal delivery is mainly conducted with manual ratings, which faces two core challenges: subjectivity and time-consuming. In this paper, we present a novel machine learning approach that utiliz&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.07956v1-abstract-full').style.display = 'inline'; document.getElementById('2107.07956v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.07956v1-abstract-full" style="display: none;"> The quality of vocal delivery is one of the key indicators for evaluating teacher enthusiasm, which has been widely accepted to be connected to the overall course qualities. However, existing evaluation for vocal delivery is mainly conducted with manual ratings, which faces two core challenges: subjectivity and time-consuming. In this paper, we present a novel machine learning approach that utilizes pairwise comparisons and a multimodal orthogonal fusing algorithm to generate large-scale objective evaluation results of the teacher vocal delivery in terms of fluency and passion. We collect two datasets from real-world education scenarios and the experiment results demonstrate the effectiveness of our algorithm. To encourage reproducible results, we make our code public available at \url{https://github.com/tal-ai/ML4VocalDelivery.git}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.07956v1-abstract-full').style.display = 'none'; document.getElementById('2107.07956v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AIED&#39;21: The 22nd International Conference on Artificial Intelligence in Education, 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.07392">arXiv:2105.07392</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2105.07392">pdf</a>, <a href="https://arxiv.org/format/2105.07392">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised Multi-Modality Registration Network based on Spatially Encoded Gradient Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wangbin Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+L">Lei Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhuang%2C+X">Xiahai Zhuang</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Liqin Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.07392v3-abstract-short" style="display: inline;"> Multi-modality medical images can provide relevant or complementary information for a target (organ, tumor or tissue). Registering multi-modality images to a common space can fuse these comprehensive information, and bring convenience for clinical application. Recently, neural networks have been widely investigated to boost registration methods. However, it is still challenging to develop a multi-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.07392v3-abstract-full').style.display = 'inline'; document.getElementById('2105.07392v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.07392v3-abstract-full" style="display: none;"> Multi-modality medical images can provide relevant or complementary information for a target (organ, tumor or tissue). Registering multi-modality images to a common space can fuse these comprehensive information, and bring convenience for clinical application. Recently, neural networks have been widely investigated to boost registration methods. However, it is still challenging to develop a multi-modality registration network due to the lack of robust criteria for network training. In this work, we propose a multi-modality registration network (MMRegNet), which can perform registration between multi-modality images. Meanwhile, we present spatially encoded gradient information to train MMRegNet in an unsupervised manner. The proposed network was evaluated on MM-WHS 2017. Results show that MMRegNet can achieve promising performance for left ventricle cardiac registration tasks. Meanwhile, to demonstrate the versatility of MMRegNet, we further evaluate the method with a liver dataset from CHAOS 2019. Source code will be released publicly\footnote{https://github.com/NanYoMy/mmregnet} once the manuscript is accepted. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.07392v3-abstract-full').style.display = 'none'; document.getElementById('2105.07392v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.04006">arXiv:2104.04006</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2104.04006">pdf</a>, <a href="https://arxiv.org/format/2104.04006">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.compmedimag.2021.102008">10.1016/j.compmedimag.2021.102008 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DenResCov-19: A deep transfer learning network for robust automatic classification of COVID-19, pneumonia, and tuberculosis from X-rays </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Mamalakis%2C+M">Michail Mamalakis</a>, <a href="/search/eess?searchtype=author&amp;query=Swift%2C+A+J">Andrew J. Swift</a>, <a href="/search/eess?searchtype=author&amp;query=Vorselaars%2C+B">Bart Vorselaars</a>, <a href="/search/eess?searchtype=author&amp;query=Ray%2C+S">Surajit Ray</a>, <a href="/search/eess?searchtype=author&amp;query=Weeks%2C+S">Simonne Weeks</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weiping Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Clayton%2C+R+H">Richard H. Clayton</a>, <a href="/search/eess?searchtype=author&amp;query=Mackenzie%2C+L+S">Louise S. Mackenzie</a>, <a href="/search/eess?searchtype=author&amp;query=Banerjee%2C+A">Abhirup Banerjee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.04006v1-abstract-short" style="display: inline;"> The global pandemic of COVID-19 is continuing to have a significant effect on the well-being of global population, increasing the demand for rapid testing, diagnosis, and treatment. Along with COVID-19, other etiologies of pneumonia and tuberculosis constitute additional challenges to the medical system. In this regard, the objective of this work is to develop a new deep transfer learning pipeline&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.04006v1-abstract-full').style.display = 'inline'; document.getElementById('2104.04006v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.04006v1-abstract-full" style="display: none;"> The global pandemic of COVID-19 is continuing to have a significant effect on the well-being of global population, increasing the demand for rapid testing, diagnosis, and treatment. Along with COVID-19, other etiologies of pneumonia and tuberculosis constitute additional challenges to the medical system. In this regard, the objective of this work is to develop a new deep transfer learning pipeline to diagnose patients with COVID-19, pneumonia, and tuberculosis, based on chest x-ray images. We observed in some instances DenseNet and Resnet have orthogonal performances. In our proposed model, we have created an extra layer with convolutional neural network blocks to combine these two models to establish superior performance over either model. The same strategy can be useful in other applications where two competing networks with complementary performance are observed. We have tested the performance of our proposed network on two-class (pneumonia vs healthy), three-class (including COVID-19), and four-class (including tuberculosis) classification problems. The proposed network has been able to successfully classify these lung diseases in all four datasets and has provided significant improvement over the benchmark networks of DenseNet, ResNet, and Inception-V3. These novel findings can deliver a state-of-the-art pre-screening fast-track decision network to detect COVID-19 and other lung pathologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.04006v1-abstract-full').style.display = 'none'; document.getElementById('2104.04006v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> 102008, 0895-6111 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2021, Computerized Medical Imaging and Graphics </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.10919">arXiv:2102.10919</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2102.10919">pdf</a>, <a href="https://arxiv.org/ps/2102.10919">ps</a>, <a href="https://arxiv.org/format/2102.10919">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.cmpb.2021.106363">10.1016/j.cmpb.2021.106363 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Interpretative Computer-aided Lung Cancer Diagnosis: from Radiology Analysis to Malignancy Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+S">Shaohua Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Shen%2C+Z">Zhiqiang Shen</a>, <a href="/search/eess?searchtype=author&amp;query=Peia%2C+C">Chenhao Peia</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wangbin Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Lin%2C+H">Haojin Lin</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+J">Jiepeng Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Pan%2C+L">Lin Pan</a>, <a href="/search/eess?searchtype=author&amp;query=Zheng%2C+B">Bin Zheng</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Liqin Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.10919v1-abstract-short" style="display: inline;"> Background and Objective:Computer-aided diagnosis (CAD) systems promote diagnosis effectiveness and alleviate pressure of radiologists. A CAD system for lung cancer diagnosis includes nodule candidate detection and nodule malignancy evaluation. Recently, deep learning-based pulmonary nodule detection has reached satisfactory performance ready for clinical application. However, deep learning-based&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.10919v1-abstract-full').style.display = 'inline'; document.getElementById('2102.10919v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.10919v1-abstract-full" style="display: none;"> Background and Objective:Computer-aided diagnosis (CAD) systems promote diagnosis effectiveness and alleviate pressure of radiologists. A CAD system for lung cancer diagnosis includes nodule candidate detection and nodule malignancy evaluation. Recently, deep learning-based pulmonary nodule detection has reached satisfactory performance ready for clinical application. However, deep learning-based nodule malignancy evaluation depends on heuristic inference from low-dose computed tomography volume to malignant probability, which lacks clinical cognition. Methods:In this paper, we propose a joint radiology analysis and malignancy evaluation network (R2MNet) to evaluate the pulmonary nodule malignancy via radiology characteristics analysis. Radiological features are extracted as channel descriptor to highlight specific regions of the input volume that are critical for nodule malignancy evaluation. In addition, for model explanations, we propose channel-dependent activation mapping to visualize the features and shed light on the decision process of deep neural network. Results:Experimental results on the LIDC-IDRI dataset demonstrate that the proposed method achieved area under curve of 96.27% on nodule radiology analysis and AUC of 97.52% on nodule malignancy evaluation. In addition, explanations of CDAM features proved that the shape and density of nodule regions were two critical factors that influence a nodule to be inferred as malignant, which conforms with the diagnosis cognition of experienced radiologists. Conclusion:Incorporating radiology analysis with nodule malignant evaluation, the network inference process conforms to the diagnostic procedure of radiologists and increases the confidence of evaluation results. Besides, model interpretation with CDAM features shed light on the regions which DNNs focus on when they estimate nodule malignancy probabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.10919v1-abstract-full').style.display = 'none'; document.getElementById('2102.10919v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.15647">arXiv:2010.15647</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.15647">pdf</a>, <a href="https://arxiv.org/format/2010.15647">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Brain Tumor Segmentation Network Using Attention-based Fusion and Spatial Relationship Constraint </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Liu%2C+C">Chenyu Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wangbin Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+L">Lei Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+Z">Zhen Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Pei%2C+C">Chenhao Pei</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Liqin Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Zhuang%2C+X">Xiahai Zhuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.15647v2-abstract-short" style="display: inline;"> Delineating the brain tumor from magnetic resonance (MR) images is critical for the treatment of gliomas. However, automatic delineation is challenging due to the complex appearance and ambiguous outlines of tumors. Considering that multi-modal MR images can reflect different tumor biological properties, we develop a novel multi-modal tumor segmentation network (MMTSN) to robustly segment brain tu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.15647v2-abstract-full').style.display = 'inline'; document.getElementById('2010.15647v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.15647v2-abstract-full" style="display: none;"> Delineating the brain tumor from magnetic resonance (MR) images is critical for the treatment of gliomas. However, automatic delineation is challenging due to the complex appearance and ambiguous outlines of tumors. Considering that multi-modal MR images can reflect different tumor biological properties, we develop a novel multi-modal tumor segmentation network (MMTSN) to robustly segment brain tumors based on multi-modal MR images. The MMTSN is composed of three sub-branches and a main branch. Specifically, the sub-branches are used to capture different tumor features from multi-modal images, while in the main branch, we design a spatial-channel fusion block (SCFB) to effectively aggregate multi-modal features. Additionally, inspired by the fact that the spatial relationship between sub-regions of tumor is relatively fixed, e.g., the enhancing tumor is always in the tumor core, we propose a spatial loss to constrain the relationship between different sub-regions of tumor. We evaluate our method on the test set of multi-modal brain tumor segmentation challenge 2020 (BraTs2020). The method achieves 0.8764, 0.8243 and 0.773 dice score for whole tumor, tumor core and enhancing tumor, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.15647v2-abstract-full').style.display = 'none'; document.getElementById('2010.15647v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.12733">arXiv:2010.12733</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.12733">pdf</a>, <a href="https://arxiv.org/format/2010.12733">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Learning Fine-Grained Cross Modality Excitement for Speech Emotion Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Li%2C+H">Hang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenbiao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+Z">Zhongqin Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Z">Zitao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.12733v2-abstract-short" style="display: inline;"> Speech emotion recognition is a challenging task because the emotion expression is complex, multimodal and fine-grained. In this paper, we propose a novel multimodal deep learning approach to perform fine-grained emotion recognition from real-life speeches. We design a temporal alignment mean-max pooling mechanism to capture the subtle and fine-grained emotions implied in every utterance. In addit&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.12733v2-abstract-full').style.display = 'inline'; document.getElementById('2010.12733v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.12733v2-abstract-full" style="display: none;"> Speech emotion recognition is a challenging task because the emotion expression is complex, multimodal and fine-grained. In this paper, we propose a novel multimodal deep learning approach to perform fine-grained emotion recognition from real-life speeches. We design a temporal alignment mean-max pooling mechanism to capture the subtle and fine-grained emotions implied in every utterance. In addition, we propose a cross modality excitement module to conduct sample-specific adjustment on cross modality embeddings and adaptively recalibrate the corresponding values by its aligned latent features from the other modality. Our proposed model is evaluated on two well-known real-world speech emotion recognition datasets. The results demonstrate that our approach is superior on the prediction tasks for multimodal speech utterances, and it outperforms a wide range of baselines in terms of prediction accuracy. Further more, we conduct detailed ablation studies to show that our temporal alignment mean-max pooling mechanism and cross modality excitement significantly contribute to the promising results. In order to encourage the research reproducibility, we make the code publicly available at \url{https://github.com/tal-ai/FG_CME.git}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.12733v2-abstract-full').style.display = 'none'; document.getElementById('2010.12733v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The Interspeech Conference, 2021 (INTERSPEECH 2021)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.12205">arXiv:2008.12205</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2008.12205">pdf</a>, <a href="https://arxiv.org/format/2008.12205">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Random Style Transfer based Domain Generalization Networks Integrating Shape and Spatial Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Li%2C+L">Lei Li</a>, <a href="/search/eess?searchtype=author&amp;query=Zimmer%2C+V+A">Veronika A. Zimmer</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wangbin Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+F">Fuping Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Liqin Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Schnabel%2C+J+A">Julia A. Schnabel</a>, <a href="/search/eess?searchtype=author&amp;query=Zhuang%2C+X">Xiahai Zhuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.12205v2-abstract-short" style="display: inline;"> Deep learning (DL)-based models have demonstrated good performance in medical image segmentation. However, the models trained on a known dataset often fail when performed on an unseen dataset collected from different centers, vendors and disease populations. In this work, we present a random style transfer network to tackle the domain generalization problem for multi-vendor and center cardiac imag&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.12205v2-abstract-full').style.display = 'inline'; document.getElementById('2008.12205v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.12205v2-abstract-full" style="display: none;"> Deep learning (DL)-based models have demonstrated good performance in medical image segmentation. However, the models trained on a known dataset often fail when performed on an unseen dataset collected from different centers, vendors and disease populations. In this work, we present a random style transfer network to tackle the domain generalization problem for multi-vendor and center cardiac image segmentation. Style transfer is used to generate training data with a wider distribution/ heterogeneity, namely domain augmentation. As the target domain could be unknown, we randomly generate a modality vector for the target modality in the style transfer stage, to simulate the domain shift for unknown domains. The model can be trained in a semi-supervised manner by simultaneously optimizing a supervised segmentation and an unsupervised style translation objective. Besides, the framework incorporates the spatial information and shape prior of the target by introducing two regularization terms. We evaluated the proposed framework on 40 subjects from the M\&amp;Ms challenge2020, and obtained promising performance in the segmentation for data from unknown vendors and centers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.12205v2-abstract-full').style.display = 'none'; document.getElementById('2008.12205v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.09388">arXiv:2008.09388</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2008.09388">pdf</a>, <a href="https://arxiv.org/format/2008.09388">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> CDE-GAN: Cooperative Dual Evolution Based Generative Adversarial Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Chen%2C+S">Shiming Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+W">Wenjie Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Xia%2C+B">Beihao Xia</a>, <a href="/search/eess?searchtype=author&amp;query=You%2C+X">Xinge You</a>, <a href="/search/eess?searchtype=author&amp;query=Cao%2C+Z">Zehong Cao</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weiping Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.09388v2-abstract-short" style="display: inline;"> Generative adversarial networks (GANs) have been a popular deep generative model for real-world applications. Despite many recent efforts on GANs that have been contributed, mode collapse and instability of GANs are still open problems caused by their adversarial optimization difficulties. In this paper, motivated by the cooperative co-evolutionary algorithm, we propose a Cooperative Dual Evolutio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.09388v2-abstract-full').style.display = 'inline'; document.getElementById('2008.09388v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.09388v2-abstract-full" style="display: none;"> Generative adversarial networks (GANs) have been a popular deep generative model for real-world applications. Despite many recent efforts on GANs that have been contributed, mode collapse and instability of GANs are still open problems caused by their adversarial optimization difficulties. In this paper, motivated by the cooperative co-evolutionary algorithm, we propose a Cooperative Dual Evolution based Generative Adversarial Network (CDE-GAN) to circumvent these drawbacks. In essence, CDE-GAN incorporates dual evolution with respect to the generator(s) and discriminators into a unified evolutionary adversarial framework to conduct effective adversarial multi-objective optimization. Thus it exploits the complementary properties and injects dual mutation diversity into training to steadily diversify the estimated density in capturing multi-modes and improve generative performance. Specifically, CDE-GAN decomposes the complex adversarial optimization problem into two subproblems (generation and discrimination), and each subproblem is solved with a separated subpopulation (E-Generator} and E-Discriminators), evolved by its own evolutionary algorithm. Additionally, we further propose a Soft Mechanism to balance the trade-off between E-Generators and E-Discriminators to conduct steady training for CDE-GAN. Extensive experiments on one synthetic dataset and three real-world benchmark image datasets demonstrate that the proposed CDE-GAN achieves a competitive and superior performance in generating good quality and diverse samples over baselines. The code and more generated results are available at our project homepage: https://shiming-chen.github.io/CDE-GAN-website/CDE-GAN.html. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.09388v2-abstract-full').style.display = 'none'; document.getElementById('2008.09388v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages,6 figures,4 tables. Accepted by IEEE Transactions on Evolutionary Computation</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Evolutionary Computation, 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.05780">arXiv:2008.05780</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2008.05780">pdf</a>, <a href="https://arxiv.org/format/2008.05780">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-Modality Pathology Segmentation Framework: Application to Cardiac Magnetic Resonance Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+Z">Zhen Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+C">Chenyu Liu</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wangbin Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+S">Sihan Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Pei%2C+C">Chenhao Pei</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+M">Mingjing Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+L">Liqin Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.05780v1-abstract-short" style="display: inline;"> Multi-sequence of cardiac magnetic resonance (CMR) images can provide complementary information for myocardial pathology (scar and edema). However, it is still challenging to fuse these underlying information for pathology segmentation effectively. This work presents an automatic cascade pathology segmentation framework based on multi-modality CMR images. It mainly consists of two neural networks:&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.05780v1-abstract-full').style.display = 'inline'; document.getElementById('2008.05780v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.05780v1-abstract-full" style="display: none;"> Multi-sequence of cardiac magnetic resonance (CMR) images can provide complementary information for myocardial pathology (scar and edema). However, it is still challenging to fuse these underlying information for pathology segmentation effectively. This work presents an automatic cascade pathology segmentation framework based on multi-modality CMR images. It mainly consists of two neural networks: an anatomical structure segmentation network (ASSN) and a pathological region segmentation network (PRSN). Specifically, the ASSN aims to segment the anatomical structure where the pathology may exist, and it can provide a spatial prior for the pathological region segmentation. In addition, we integrate a denoising auto-encoder (DAE) into the ASSN to generate segmentation results with plausible shapes. The PRSN is designed to segment pathological region based on the result of ASSN, in which a fusion block based on channel attention is proposed to better aggregate multi-modality information from multi-modality CMR images. Experiments from the MyoPS2020 challenge dataset show that our framework can achieve promising performance for myocardial scar and edema segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.05780v1-abstract-full').style.display = 'none'; document.getElementById('2008.05780v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages,MyoPS 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.07549">arXiv:2005.07549</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2005.07549">pdf</a>, <a href="https://arxiv.org/format/2005.07549">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Siamese Neural Networks for Class Activity Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Li%2C+H">Hang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Z">Zhiwei Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Tang%2C+J">Jiliang Tang</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenbiao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Z">Zitao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.07549v1-abstract-short" style="display: inline;"> Classroom activity detection (CAD) aims at accurately recognizing speaker roles (either teacher or student) in classrooms. A CAD solution helps teachers get instant feedback on their pedagogical instructions. However, CAD is very challenging because (1) classroom conversations contain many conversational turn-taking overlaps between teachers and students; (2) the CAD model needs to be generalized&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.07549v1-abstract-full').style.display = 'inline'; document.getElementById('2005.07549v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.07549v1-abstract-full" style="display: none;"> Classroom activity detection (CAD) aims at accurately recognizing speaker roles (either teacher or student) in classrooms. A CAD solution helps teachers get instant feedback on their pedagogical instructions. However, CAD is very challenging because (1) classroom conversations contain many conversational turn-taking overlaps between teachers and students; (2) the CAD model needs to be generalized well enough for different teachers and students; and (3) classroom recordings may be very noisy and low-quality. In this work, we address the above challenges by building a Siamese neural framework to automatically identify teacher and student utterances from classroom recordings. The proposed model is evaluated on real-world educational datasets. The results demonstrate that (1) our approach is superior on the prediction tasks for both online and offline classroom environments; and (2) our framework exhibits robustness and generalization ability on new teachers (i.e., teachers never appear in training data). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.07549v1-abstract-full').style.display = 'none'; document.getElementById('2005.07549v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The 21th International Conference on Artificial Intelligence in Education(AIED), 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.02183">arXiv:2005.02183</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2005.02183">pdf</a>, <a href="https://arxiv.org/format/2005.02183">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Comparing SNNs and RNNs on Neuromorphic Vision Datasets: Similarities and Differences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=He%2C+W">Weihua He</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+Y">YuJie Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Deng%2C+L">Lei Deng</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+G">Guoqi Li</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Tian%2C+Y">Yang Tian</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wei Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+W">Wenhui Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Xie%2C+Y">Yuan Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.02183v1-abstract-short" style="display: inline;"> Neuromorphic data, recording frameless spike events, have attracted considerable attention for the spatiotemporal information components and the event-driven processing fashion. Spiking neural networks (SNNs) represent a family of event-driven models with spatiotemporal dynamics for neuromorphic computing, which are widely benchmarked on neuromorphic data. Interestingly, researchers in the machine&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.02183v1-abstract-full').style.display = 'inline'; document.getElementById('2005.02183v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.02183v1-abstract-full" style="display: none;"> Neuromorphic data, recording frameless spike events, have attracted considerable attention for the spatiotemporal information components and the event-driven processing fashion. Spiking neural networks (SNNs) represent a family of event-driven models with spatiotemporal dynamics for neuromorphic computing, which are widely benchmarked on neuromorphic data. Interestingly, researchers in the machine learning community can argue that recurrent (artificial) neural networks (RNNs) also have the capability to extract spatiotemporal features although they are not event-driven. Thus, the question of &#34;what will happen if we benchmark these two kinds of models together on neuromorphic data&#34; comes out but remains unclear. In this work, we make a systematic study to compare SNNs and RNNs on neuromorphic data, taking the vision datasets as a case study. First, we identify the similarities and differences between SNNs and RNNs (including the vanilla RNNs and LSTM) from the modeling and learning perspectives. To improve comparability and fairness, we unify the supervised learning algorithm based on backpropagation through time (BPTT), the loss function exploiting the outputs at all timesteps, the network structure with stacked fully-connected or convolutional layers, and the hyper-parameters during training. Especially, given the mainstream loss function used in RNNs, we modify it inspired by the rate coding scheme to approach that of SNNs. Furthermore, we tune the temporal resolution of datasets to test model robustness and generalization. At last, a series of contrast experiments are conducted on two types of neuromorphic datasets: DVS-converted (N-MNIST) and DVS-captured (DVS Gesture). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.02183v1-abstract-full').style.display = 'none'; document.getElementById('2005.02183v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.03381">arXiv:2002.03381</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2002.03381">pdf</a>, <a href="https://arxiv.org/ps/2002.03381">ps</a>, <a href="https://arxiv.org/format/2002.03381">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Resource Allocation for UAV Assisted Wireless Networks with QoS Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weihang Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+Z">Zhaohui Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+M">Mingzhe Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Hou%2C+J">Jiancao Hou</a>, <a href="/search/eess?searchtype=author&amp;query=Shikh-Bahaei%2C+M">Mohammad Shikh-Bahaei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.03381v1-abstract-short" style="display: inline;"> For crowded and hotspot area, unmanned aerial vehicles (UAVs) are usually deployed to increase the coverage rate. In the considered model, there are three types of services for UAV assisted communication: control message, non-realtime communication, and real-time communication, which can cover most of the actual demands of users in a UAV assisted communication system. A bandwidth allocation proble&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.03381v1-abstract-full').style.display = 'inline'; document.getElementById('2002.03381v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.03381v1-abstract-full" style="display: none;"> For crowded and hotspot area, unmanned aerial vehicles (UAVs) are usually deployed to increase the coverage rate. In the considered model, there are three types of services for UAV assisted communication: control message, non-realtime communication, and real-time communication, which can cover most of the actual demands of users in a UAV assisted communication system. A bandwidth allocation problem is considered to minimize the total energy consumption of this system while satisfying the requirements. Two techniques are introduced to enhance the performance of the system. The first method is to categorize the ground users into multiple user groups and offer each group a unique RF channel with different bandwidth. The second method is to deploy more than one UAVs in the system. Bandwidth optimization in each scheme is proved to be a convex problem. Simulation results show the superiority of the proposed schemes in terms of energy consumption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.03381v1-abstract-full').style.display = 'none'; document.getElementById('2002.03381v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE WCNC 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2001.11539">arXiv:2001.11539</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2001.11539">pdf</a>, <a href="https://arxiv.org/format/2001.11539">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Code Learning for Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Yuan%2C+J">Jiangbo Yuan</a>, <a href="/search/eess?searchtype=author&amp;query=Wu%2C+B">Bing Wu</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wanying Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Ping%2C+Q">Qing Ping</a>, <a href="/search/eess?searchtype=author&amp;query=Yu%2C+Z">Zhendong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2001.11539v1-abstract-short" style="display: inline;"> We introduce the &#34;adversarial code learning&#34; (ACL) module that improves overall image generation performance to several types of deep models. Instead of performing a posterior distribution modeling in the pixel spaces of generators, ACLs aim to jointly learn a latent code with another image encoder/inference net, with a prior noise as its input. We conduct the learning in an adversarial learning p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.11539v1-abstract-full').style.display = 'inline'; document.getElementById('2001.11539v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2001.11539v1-abstract-full" style="display: none;"> We introduce the &#34;adversarial code learning&#34; (ACL) module that improves overall image generation performance to several types of deep models. Instead of performing a posterior distribution modeling in the pixel spaces of generators, ACLs aim to jointly learn a latent code with another image encoder/inference net, with a prior noise as its input. We conduct the learning in an adversarial learning process, which bears a close resemblance to the original GAN but again shifts the learning from image spaces to prior and latent code spaces. ACL is a portable module that brings up much more flexibility and possibilities in generative model designs. First, it allows flexibility to convert non-generative models like Autoencoders and standard classification models to decent generative models. Second, it enhances existing GANs&#39; performance by generating meaningful codes and images from any part of the prior. We have incorporated our ACL module with the aforementioned frameworks and have performed experiments on synthetic, MNIST, CIFAR-10, and CelebA datasets. Our models have achieved significant improvements which demonstrated the generality for image generation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.11539v1-abstract-full').style.display = 'none'; document.getElementById('2001.11539v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.03418">arXiv:1912.03418</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1912.03418">pdf</a>, <a href="https://arxiv.org/format/1912.03418">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Cascaded Deep Neural Networks for Retinal Layer Segmentation of Optical Coherence Tomography with Fluid Presence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Lu%2C+D">Donghuan Lu</a>, <a href="/search/eess?searchtype=author&amp;query=Heisler%2C+M">Morgan Heisler</a>, <a href="/search/eess?searchtype=author&amp;query=Ma%2C+D">Da Ma</a>, <a href="/search/eess?searchtype=author&amp;query=Dabiri%2C+S">Setareh Dabiri</a>, <a href="/search/eess?searchtype=author&amp;query=Lee%2C+S">Sieun Lee</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+G+W">Gavin Weiguang Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Sarunic%2C+M+V">Marinko V. Sarunic</a>, <a href="/search/eess?searchtype=author&amp;query=Beg%2C+M+F">Mirza Faisal Beg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.03418v1-abstract-short" style="display: inline;"> Optical coherence tomography (OCT) is a non-invasive imaging technology which can provide micrometer-resolution cross-sectional images of the inner structures of the eye. It is widely used for the diagnosis of ophthalmic diseases with retinal alteration, such as layer deformation and fluid accumulation. In this paper, a novel framework was proposed to segment retinal layers with fluid presence. Th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.03418v1-abstract-full').style.display = 'inline'; document.getElementById('1912.03418v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.03418v1-abstract-full" style="display: none;"> Optical coherence tomography (OCT) is a non-invasive imaging technology which can provide micrometer-resolution cross-sectional images of the inner structures of the eye. It is widely used for the diagnosis of ophthalmic diseases with retinal alteration, such as layer deformation and fluid accumulation. In this paper, a novel framework was proposed to segment retinal layers with fluid presence. The main contribution of this study is two folds: 1) we developed a cascaded network framework to incorporate the prior structural knowledge; 2) we proposed a novel deep neural network based on U-Net and fully convolutional network, termed LF-UNet. Cross validation experiments proved that the proposed LF-UNet has superior performance comparing with the state-of-the-art methods, and incorporating the relative distance map structural prior information could further improve the performance regardless the network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.03418v1-abstract-full').style.display = 'none'; document.getElementById('1912.03418v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.06878">arXiv:1911.06878</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.06878">pdf</a>, <a href="https://arxiv.org/format/1911.06878">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Multi-scale Detection of Acoustic Events </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenhao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+L">Liang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.06878v2-abstract-short" style="display: inline;"> The goal of acoustic (or sound) events detection (AED or SED) is to predict the temporal position of target events in given audio segments. This task plays a significant role in safety monitoring, acoustic early warning and other scenarios. However, the deficiency of data and diversity of acoustic event sources make the AED task a tough issue, especially for prevalent data-driven methods. In this&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.06878v2-abstract-full').style.display = 'inline'; document.getElementById('1911.06878v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.06878v2-abstract-full" style="display: none;"> The goal of acoustic (or sound) events detection (AED or SED) is to predict the temporal position of target events in given audio segments. This task plays a significant role in safety monitoring, acoustic early warning and other scenarios. However, the deficiency of data and diversity of acoustic event sources make the AED task a tough issue, especially for prevalent data-driven methods. In this paper, we start by analyzing acoustic events according to their time-frequency domain properties, showing that different acoustic events have different time-frequency scale characteristics. Inspired by the analysis, we propose an adaptive multi-scale detection (AdaMD) method. By taking advantage of the hourglass neural network and gated recurrent unit (GRU) module, our AdaMD produces multiple predictions at different temporal and frequency resolutions. An adaptive training algorithm is subsequently adopted to combine multi-scale predictions to enhance its overall capability. Experimental results on Detection and Classification of Acoustic Scenes and Events 2017 (DCASE 2017) Task 2, DCASE 2016 Task 3 and DCASE 2017 Task 3 demonstrate that the AdaMD outperforms published state-of-the-art competitors in terms of the metrics of event error rate (ER) and F1-score. The verification experiment on our collected factory mechanical dataset also proves the noise-resistant capability of the AdaMD, providing the possibility for it to be deployed in the complex environment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.06878v2-abstract-full').style.display = 'none'; document.getElementById('1911.06878v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE/ACM TRANSACTIONS ON AUDIO, SPEECH, AND LANGUAGE PROCESSING (TASLP)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.13799">arXiv:1910.13799</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1910.13799">pdf</a>, <a href="https://arxiv.org/format/1910.13799">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Learning For Classroom Activity Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Li%2C+H">Hang Li</a>, <a href="/search/eess?searchtype=author&amp;query=Kang%2C+Y">Yu Kang</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenbiao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+S">Song Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Yang%2C+S">Songfan Yang</a>, <a href="/search/eess?searchtype=author&amp;query=Huang%2C+G+Y">Gale Yan Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Liu%2C+Z">Zitao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.13799v3-abstract-short" style="display: inline;"> Classroom activity detection (CAD) focuses on accurately classifying whether the teacher or student is speaking and recording both the length of individual utterances during a class. A CAD solution helps teachers get instant feedback on their pedagogical instructions. This greatly improves educators&#39; teaching skills and hence leads to students&#39; achievement. However, CAD is very challenging because&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.13799v3-abstract-full').style.display = 'inline'; document.getElementById('1910.13799v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.13799v3-abstract-full" style="display: none;"> Classroom activity detection (CAD) focuses on accurately classifying whether the teacher or student is speaking and recording both the length of individual utterances during a class. A CAD solution helps teachers get instant feedback on their pedagogical instructions. This greatly improves educators&#39; teaching skills and hence leads to students&#39; achievement. However, CAD is very challenging because (1) the CAD model needs to be generalized well enough for different teachers and students; (2) data from both vocal and language modalities has to be wisely fused so that they can be complementary; and (3) the solution shouldn&#39;t heavily rely on additional recording device. In this paper, we address the above challenges by using a novel attention based neural framework. Our framework not only extracts both speech and language information, but utilizes attention mechanism to capture long-term semantic dependence. Our framework is device-free and is able to take any classroom recording as input. The proposed CAD learning framework is evaluated in two real-world education applications. The experimental results demonstrate the benefits of our approach on learning attention based neural network from classroom data with different modalities, and show our approach is able to outperform state-of-the-art baselines in terms of various evaluation metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.13799v3-abstract-full').style.display = 'none'; document.getElementById('1910.13799v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The 45th International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2020)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.00099">arXiv:1910.00099</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1910.00099">pdf</a>, <a href="https://arxiv.org/format/1910.00099">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> CMTS: Conditional Multiple Trajectory Synthesizer for Generating Safety-critical Driving Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenhao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Xu%2C+M">Mengdi Xu</a>, <a href="/search/eess?searchtype=author&amp;query=Zhao%2C+D">Ding Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.00099v2-abstract-short" style="display: inline;"> Naturalistic driving trajectories are crucial for the performance of autonomous driving algorithms. However, most of the data is collected in safe scenarios leading to the duplication of trajectories which are easy to be handled by currently developed algorithms. When considering safety, testing algorithms in near-miss scenarios that rarely show up in off-the-shelf datasets is a vital part of the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.00099v2-abstract-full').style.display = 'inline'; document.getElementById('1910.00099v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.00099v2-abstract-full" style="display: none;"> Naturalistic driving trajectories are crucial for the performance of autonomous driving algorithms. However, most of the data is collected in safe scenarios leading to the duplication of trajectories which are easy to be handled by currently developed algorithms. When considering safety, testing algorithms in near-miss scenarios that rarely show up in off-the-shelf datasets is a vital part of the evaluation. As a remedy, we propose a near-miss data synthesizing framework based on Variational Bayesian methods and term it as Conditional Multiple Trajectory Synthesizer (CMTS). We leverage a generative model conditioned on road maps to bridge safe and collision driving data by representing their distribution in the latent space. By sampling from the near-miss distribution, we can synthesize safety-critical data crucial for understanding traffic scenarios but not shown in neither the original dataset nor the collision dataset. Our experimental results demonstrate that the augmented dataset covers more kinds of driving scenarios, especially the near-miss ones, which help improve the trajectory prediction accuracy and the capability of dealing with risky driving scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.00099v2-abstract-full').style.display = 'none'; document.getElementById('1910.00099v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICRA 2020, 8 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.00063">arXiv:1904.00063</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1904.00063">pdf</a>, <a href="https://arxiv.org/format/1904.00063">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Multi-Scale Time-Frequency Attention for Acoustic Event Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Zhang%2C+J">Jingyang Zhang</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenhao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Kang%2C+J">Jintao Kang</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+L">Liang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.00063v3-abstract-short" style="display: inline;"> Most attention-based methods only concentrate along the time axis, which is insufficient for Acoustic Event Detection (AED). Meanwhile, previous methods for AED rarely considered that target events possess distinct temporal and frequential scales. In this work, we propose a Multi-Scale Time-Frequency Attention (MTFA) module for AED. MTFA gathers information at multiple resolutions to generate a ti&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.00063v3-abstract-full').style.display = 'inline'; document.getElementById('1904.00063v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.00063v3-abstract-full" style="display: none;"> Most attention-based methods only concentrate along the time axis, which is insufficient for Acoustic Event Detection (AED). Meanwhile, previous methods for AED rarely considered that target events possess distinct temporal and frequential scales. In this work, we propose a Multi-Scale Time-Frequency Attention (MTFA) module for AED. MTFA gathers information at multiple resolutions to generate a time-frequency attention mask which tells the model where to focus along both time and frequency axis. With MTFA, the model could capture the characteristics of target events with different scales. We demonstrate the proposed method on Task 2 of Detection and Classification of Acoustic Scenes and Events (DCASE) 2017 Challenge. Our method achieves competitive results on both development dataset and evaluation dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.00063v3-abstract-full').style.display = 'none'; document.getElementById('1904.00063v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Interspeech 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1809.06671">arXiv:1809.06671</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1809.06671">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.neucom.2018.08.091">10.1016/j.neucom.2018.08.091 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Effects of Repetitive SSVEPs on EEG Complexity using Multiscale Inherent Fuzzy Entropy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Cao%2C+Z">Zehong Cao</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weiping Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Wang%2C+Y">Yu-Kai Wang</a>, <a href="/search/eess?searchtype=author&amp;query=Hussain%2C+F+K">Farookh Khadeer Hussain</a>, <a href="/search/eess?searchtype=author&amp;query=Al-Jumaily%2C+A">Adel Al-Jumaily</a>, <a href="/search/eess?searchtype=author&amp;query=Lin%2C+C">Chin-Teng Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1809.06671v2-abstract-short" style="display: inline;"> Multiscale inherent fuzzy entropy is an objective measurement of electroencephalography (EEG) complexity, reflecting the habituation of brain systems. Entropy dynamics are generally believed to reflect the ability of the brain to adapt to a visual stimulus environment. In this study, we explored repetitive steady-state visual evoked potential (SSVEP)-based EEG complexity by assessing multiscale in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.06671v2-abstract-full').style.display = 'inline'; document.getElementById('1809.06671v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1809.06671v2-abstract-full" style="display: none;"> Multiscale inherent fuzzy entropy is an objective measurement of electroencephalography (EEG) complexity, reflecting the habituation of brain systems. Entropy dynamics are generally believed to reflect the ability of the brain to adapt to a visual stimulus environment. In this study, we explored repetitive steady-state visual evoked potential (SSVEP)-based EEG complexity by assessing multiscale inherent fuzzy entropy with relative measurements. We used a wearable EEG device with Oz and Fpz electrodes to collect EEG signals from 40 participants under the following three conditions: a resting state (closed-eyes (CE) and open- eyes (OE) stimulation with five 15-Hz CE SSVEPs and stimulation with five 20-Hz OE SSVEPs. We noted monotonic enhancement of occipital EEG relative complexity with increasing stimulus times in CE and OE conditions. The occipital EEG relative complexity was significantly higher for the fifth SSVEP than for the first SSEVP (FDR-adjusted p &lt; 0.05). Similarly, the prefrontal EEG relative complexity tended to be significantly higher in the OE condition compared to that in the CE condition (FDR-adjusted p &lt; 0.05). The results also indicate that multiscale inherent fuzzy entropy is superior to other competing multiscale-based entropy methods. In conclusion, EEG relative complexity increases with stimulus times, a finding that reflects the strong habituation of brain systems. These results suggest that multiscale inherent fuzzy entropy is an EEG pattern with which brain complexity can be assessed using repetitive SSVEP stimuli. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.06671v2-abstract-full').style.display = 'none'; document.getElementById('1809.06671v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The manuscript is pending publication in Neurocomputing</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Neurocomputing (Available online 8 May 2019) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.11446">arXiv:1805.11446</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1805.11446">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TBME.2018.2877651">10.1109/TBME.2018.2877651 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Identifying Ketamine Responses in Treatment-Resistant Depression Using a Wearable Forehead EEG </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Cao%2C+Z">Zehong Cao</a>, <a href="/search/eess?searchtype=author&amp;query=Lin%2C+C">Chin-Teng Lin</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Weiping Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Chen%2C+M">Mu-Hong Chen</a>, <a href="/search/eess?searchtype=author&amp;query=Li%2C+C">Cheng-Ta Li</a>, <a href="/search/eess?searchtype=author&amp;query=Su%2C+T">Tung-Ping Su</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.11446v3-abstract-short" style="display: inline;"> This study explores the responses to ketamine in patients with treatment-resistant depression (TRD) using a wearable forehead electroencephalography (EEG) device. We recruited fifty-five outpatients with TRD who were randomised into three approximately equal-sized groups (A: 0.5 mg/kg ketamine; B: 0.2 mg/kg ketamine; and C: normal saline) under double-blind conditions. The ketamine responses were&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.11446v3-abstract-full').style.display = 'inline'; document.getElementById('1805.11446v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.11446v3-abstract-full" style="display: none;"> This study explores the responses to ketamine in patients with treatment-resistant depression (TRD) using a wearable forehead electroencephalography (EEG) device. We recruited fifty-five outpatients with TRD who were randomised into three approximately equal-sized groups (A: 0.5 mg/kg ketamine; B: 0.2 mg/kg ketamine; and C: normal saline) under double-blind conditions. The ketamine responses were measured by EEG signals and Hamilton Depression Rating Scale (HDRS) scores. At baseline, responders showed a significantly weaker EEG theta power than did non- responders (p &lt; 0.05). Responders exhibited a higher EEG alpha power but lower EEG alpha asymmetry and theta cordance at post-treatment than at baseline (p &lt; 0.05). Furthermore, our baseline EEG predictor classified responders and non-responders with 81.3 +- 9.5% accuracy, 82.1 +- 8.6% sensitivity and 91.9 +- 7.4% specificity. In conclusion, the rapid antidepressant effects of mixed doses of ketamine are associated with prefrontal EEG power, asymmetry and cordance at baseline and early post-treatment changes. The prefrontal EEG patterns at baseline may account for recognising ketamine effects in advance. Our randomised, double- blind, placebo-controlled study provides information regarding clinical impacts on the potential targets underlying baseline identification and early changes from the effects of ketamine in patients with TRD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.11446v3-abstract-full').style.display = 'none'; document.getElementById('1805.11446v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This revised article is submitting to IEEE TBME</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Biomedical Engineering (Page(s): 1668 - 1679, Volume: 66 , Issue: 6 , June 2019 ) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.08635">arXiv:1805.08635</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1805.08635">pdf</a>, <a href="https://arxiv.org/ps/1805.08635">ps</a>, <a href="https://arxiv.org/format/1805.08635">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Joint Configuration of Transmission Direction and Altitude in UAV-based Two-Way Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Huang%2C+W">Wenqian Huang</a>, <a href="/search/eess?searchtype=author&amp;query=Kim%2C+D+M">Dong Min Kim</a>, <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenrui Ding</a>, <a href="/search/eess?searchtype=author&amp;query=Popovski%2C+P">Petar Popovski</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.08635v1-abstract-short" style="display: inline;"> When considering unidirectional communication for unmanned aerial vehicles (UAVs) as flying Base Stations (BSs), either uplink or downlink, the system is limited through the co-channel interference that takes place over line-of-sight (LoS) links. This paper considers two-way communication and takes advantage of the fact that the interference among the ground devices takes place through non-line-of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.08635v1-abstract-full').style.display = 'inline'; document.getElementById('1805.08635v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.08635v1-abstract-full" style="display: none;"> When considering unidirectional communication for unmanned aerial vehicles (UAVs) as flying Base Stations (BSs), either uplink or downlink, the system is limited through the co-channel interference that takes place over line-of-sight (LoS) links. This paper considers two-way communication and takes advantage of the fact that the interference among the ground devices takes place through non-line-of-sight (NLoS) links. UAVs can be deployed at the high altitudes to have larger coverage, while the two-way communication allows to configure the transmission direction. Using these two levers, we show how the system throughput can be maximized for a given deployment of the ground devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.08635v1-abstract-full').style.display = 'none'; document.getElementById('1805.08635v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 3 figures, Submitted for publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1803.09059">arXiv:1803.09059</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1803.09059">pdf</a>, <a href="https://arxiv.org/format/1803.09059">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> MTGAN: Speaker Verification through Multitasking Triplet Generative Adversarial Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&amp;query=Ding%2C+W">Wenhao Ding</a>, <a href="/search/eess?searchtype=author&amp;query=He%2C+L">Liang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1803.09059v1-abstract-short" style="display: inline;"> In this paper, we propose an enhanced triplet method that improves the encoding process of embeddings by jointly utilizing generative adversarial mechanism and multitasking optimization. We extend our triplet encoder with Generative Adversarial Networks (GANs) and softmax loss function. GAN is introduced for increasing the generality and diversity of samples, while softmax is for reinforcing featu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1803.09059v1-abstract-full').style.display = 'inline'; document.getElementById('1803.09059v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1803.09059v1-abstract-full" style="display: none;"> In this paper, we propose an enhanced triplet method that improves the encoding process of embeddings by jointly utilizing generative adversarial mechanism and multitasking optimization. We extend our triplet encoder with Generative Adversarial Networks (GANs) and softmax loss function. GAN is introduced for increasing the generality and diversity of samples, while softmax is for reinforcing features about speakers. For simplification, we term our method Multitasking Triplet Generative Adversarial Networks (MTGAN). Experiment on short utterances demonstrates that MTGAN reduces the verification equal error rate (EER) by 67% (relatively) and 32% (relatively) over conventional i-vector method and state-of-the-art triplet loss method respectively. This effectively indicates that MTGAN outperforms triplet methods in the aspect of expressing the high-level feature of speaker information. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1803.09059v1-abstract-full').style.display = 'none'; document.getElementById('1803.09059v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to Interspeech 2018</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Ding%2C+W&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Ding%2C+W&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Ding%2C+W&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10