Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–36 of 36 results for author: <span class="mathjax">Hussain, S</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Hussain%2C+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Hussain, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Hussain%2C+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Hussain, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05236">arXiv:2502.05236</a> <span> [<a href="https://arxiv.org/pdf/2502.05236">pdf</a>, <a href="https://arxiv.org/format/2502.05236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Koel-TTS: Enhancing LLM based Speech Generation with Preference Alignment and Classifier Free Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/eess?searchtype=author&query=Neekhara%2C+P">Paarth Neekhara</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+X">Xuesong Yang</a>, <a href="/search/eess?searchtype=author&query=Casanova%2C+E">Edresson Casanova</a>, <a href="/search/eess?searchtype=author&query=Ghosh%2C+S">Subhankar Ghosh</a>, <a href="/search/eess?searchtype=author&query=Desta%2C+M+T">Mikyas T. Desta</a>, <a href="/search/eess?searchtype=author&query=Fejgin%2C+R">Roy Fejgin</a>, <a href="/search/eess?searchtype=author&query=Valle%2C+R">Rafael Valle</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jason Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05236v1-abstract-short" style="display: inline;"> While autoregressive speech token generation models produce speech with remarkable variety and naturalness, their inherent lack of controllability often results in issues such as hallucinations and undesired vocalizations that do not conform to conditioning inputs. We introduce Koel-TTS, a suite of enhanced encoder-decoder Transformer TTS models that address these challenges by incorporating prefe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05236v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05236v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05236v1-abstract-full" style="display: none;"> While autoregressive speech token generation models produce speech with remarkable variety and naturalness, their inherent lack of controllability often results in issues such as hallucinations and undesired vocalizations that do not conform to conditioning inputs. We introduce Koel-TTS, a suite of enhanced encoder-decoder Transformer TTS models that address these challenges by incorporating preference alignment techniques guided by automatic speech recognition and speaker verification models. Additionally, we incorporate classifier-free guidance to further improve synthesis adherence to the transcript and reference speaker audio. Our experiments demonstrate that these optimizations significantly enhance target speaker similarity, intelligibility, and naturalness of synthesized speech. Notably, Koel-TTS directly maps text and context audio to acoustic tokens, and on the aforementioned metrics, outperforms state-of-the-art TTS models, despite being trained on a significantly smaller dataset. Audio samples and demos are available on our website. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05236v1-abstract-full').style.display = 'none'; document.getElementById('2502.05236v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04713">arXiv:2502.04713</a> <span> [<a href="https://arxiv.org/pdf/2502.04713">pdf</a>, <a href="https://arxiv.org/format/2502.04713">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Leveraging band diversity for feature selection in EO data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sadia Hussain</a>, <a href="/search/eess?searchtype=author&query=Lall%2C+B">Brejesh Lall</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04713v1-abstract-short" style="display: inline;"> Hyperspectral imaging (HSI) is a powerful earth observation technology that captures and processes information across a wide spectrum of wavelengths. Hyperspectral imaging provides comprehensive and detailed spectral data that is invaluable for a wide range of reconstruction problems. However due to complexity in analysis it often becomes difficult to handle this data. To address the challenge of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04713v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04713v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04713v1-abstract-full" style="display: none;"> Hyperspectral imaging (HSI) is a powerful earth observation technology that captures and processes information across a wide spectrum of wavelengths. Hyperspectral imaging provides comprehensive and detailed spectral data that is invaluable for a wide range of reconstruction problems. However due to complexity in analysis it often becomes difficult to handle this data. To address the challenge of handling large number of bands in reconstructing high quality HSI, we propose to form groups of bands. In this position paper we propose a method of selecting diverse bands using determinantal point processes in correlated bands. To address the issue of overlapping bands that may arise from grouping, we use spectral angle mapper analysis. This analysis can be fed to any Machine learning model to enable detailed analysis and monitoring with high precision and accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04713v1-abstract-full').style.display = 'none'; document.getElementById('2502.04713v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06660">arXiv:2412.06660</a> <span> [<a href="https://arxiv.org/pdf/2412.06660">pdf</a>, <a href="https://arxiv.org/format/2412.06660">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> MuMu-LLaMA: Multi-modal Music Understanding and Generation via Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shansong Liu</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+A+S">Atin Sakkeer Hussain</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qilong Wu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+C">Chenshuo Sun</a>, <a href="/search/eess?searchtype=author&query=Shan%2C+Y">Ying Shan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06660v1-abstract-short" style="display: inline;"> Research on large language models has advanced significantly across text, speech, images, and videos. However, multi-modal music understanding and generation remain underexplored due to the lack of well-annotated datasets. To address this, we introduce a dataset with 167.69 hours of multi-modal data, including text, images, videos, and music annotations. Based on this dataset, we propose MuMu-LLaM… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06660v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06660v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06660v1-abstract-full" style="display: none;"> Research on large language models has advanced significantly across text, speech, images, and videos. However, multi-modal music understanding and generation remain underexplored due to the lack of well-annotated datasets. To address this, we introduce a dataset with 167.69 hours of multi-modal data, including text, images, videos, and music annotations. Based on this dataset, we propose MuMu-LLaMA, a model that leverages pre-trained encoders for music, images, and videos. For music generation, we integrate AudioLDM 2 and MusicGen. Our evaluation across four tasks--music understanding, text-to-music generation, prompt-based music editing, and multi-modal music generation--demonstrates that MuMu-LLaMA outperforms state-of-the-art models, showing its potential for multi-modal music applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06660v1-abstract-full').style.display = 'none'; document.getElementById('2412.06660v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10146">arXiv:2410.10146</a> <span> [<a href="https://arxiv.org/pdf/2410.10146">pdf</a>, <a href="https://arxiv.org/format/2410.10146">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-031-73376-5_6">10.1007/978-3-031-73376-5_6 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Performance Evaluation of Deep Learning and Transformer Models Using Multimodal Data for Breast Cancer Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sadam Hussain</a>, <a href="/search/eess?searchtype=author&query=Ali%2C+M">Mansoor Ali</a>, <a href="/search/eess?searchtype=author&query=Naseem%2C+U">Usman Naseem</a>, <a href="/search/eess?searchtype=author&query=Palomo%2C+B+A+B">Beatriz Alejandra Bosques Palomo</a>, <a href="/search/eess?searchtype=author&query=Molina%2C+M+A+M">Mario Alexis Monsivais Molina</a>, <a href="/search/eess?searchtype=author&query=Abdala%2C+J+A+G">Jorge Alberto Garza Abdala</a>, <a href="/search/eess?searchtype=author&query=Avalos%2C+D+B+A">Daly Betzabeth Avendano Avalos</a>, <a href="/search/eess?searchtype=author&query=Cardona-Huerta%2C+S">Servando Cardona-Huerta</a>, <a href="/search/eess?searchtype=author&query=Gulliver%2C+T+A">T. Aaron Gulliver</a>, <a href="/search/eess?searchtype=author&query=Pena%2C+J+G+T">Jose Gerardo Tamez Pena</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10146v1-abstract-short" style="display: inline;"> Rising breast cancer (BC) occurrence and mortality are major global concerns for women. Deep learning (DL) has demonstrated superior diagnostic performance in BC classification compared to human expert readers. However, the predominant use of unimodal (digital mammography) features may limit the current performance of diagnostic models. To address this, we collected a novel multimodal dataset comp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10146v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10146v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10146v1-abstract-full" style="display: none;"> Rising breast cancer (BC) occurrence and mortality are major global concerns for women. Deep learning (DL) has demonstrated superior diagnostic performance in BC classification compared to human expert readers. However, the predominant use of unimodal (digital mammography) features may limit the current performance of diagnostic models. To address this, we collected a novel multimodal dataset comprising both imaging and textual data. This study proposes a multimodal DL architecture for BC classification, utilising images (mammograms; four views) and textual data (radiological reports) from our new in-house dataset. Various augmentation techniques were applied to enhance the training data size for both imaging and textual data. We explored the performance of eleven SOTA DL architectures (VGG16, VGG19, ResNet34, ResNet50, MobileNet-v3, EffNet-b0, EffNet-b1, EffNet-b2, EffNet-b3, EffNet-b7, and Vision Transformer (ViT)) as imaging feature extractors. For textual feature extraction, we utilised either artificial neural networks (ANNs) or long short-term memory (LSTM) networks. The combined imaging and textual features were then inputted into an ANN classifier for BC classification, using the late fusion technique. We evaluated different feature extractor and classifier arrangements. The VGG19 and ANN combinations achieved the highest accuracy of 0.951. For precision, the VGG19 and ANN combination again surpassed other CNN and LSTM, ANN based architectures by achieving a score of 0.95. The best sensitivity score of 0.903 was achieved by the VGG16+LSTM. The highest F1 score of 0.931 was achieved by VGG19+LSTM. Only the VGG16+LSTM achieved the best area under the curve (AUC) of 0.937, with VGG16+LSTM closely following with a 0.929 AUC score. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10146v1-abstract-full').style.display = 'none'; document.getElementById('2410.10146v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper was accepted and presented in 3rd Workshop on Cancer Prevention, detection, and intervenTion (CaPTion @ MICCAI 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12117">arXiv:2409.12117</a> <span> [<a href="https://arxiv.org/pdf/2409.12117">pdf</a>, <a href="https://arxiv.org/ps/2409.12117">ps</a>, <a href="https://arxiv.org/format/2409.12117">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Low Frame-rate Speech Codec: a Codec Designed for Fast High-quality Speech LLM Training and Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Casanova%2C+E">Edresson Casanova</a>, <a href="/search/eess?searchtype=author&query=Langman%2C+R">Ryan Langman</a>, <a href="/search/eess?searchtype=author&query=Neekhara%2C+P">Paarth Neekhara</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jason Li</a>, <a href="/search/eess?searchtype=author&query=Ghosh%2C+S">Subhankar Ghosh</a>, <a href="/search/eess?searchtype=author&query=Juki%C4%87%2C+A">Ante Juki膰</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+S">Sang-gil Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12117v1-abstract-short" style="display: inline;"> Large language models (LLMs) have significantly advanced audio processing through audio codecs that convert audio into discrete tokens, enabling the application of language modeling techniques to audio data. However, audio codecs often operate at high frame rates, resulting in slow training and inference, especially for autoregressive models. To address this challenge, we present the Low Frame-rat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12117v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12117v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12117v1-abstract-full" style="display: none;"> Large language models (LLMs) have significantly advanced audio processing through audio codecs that convert audio into discrete tokens, enabling the application of language modeling techniques to audio data. However, audio codecs often operate at high frame rates, resulting in slow training and inference, especially for autoregressive models. To address this challenge, we present the Low Frame-rate Speech Codec (LFSC): a neural audio codec that leverages finite scalar quantization and adversarial training with large speech language models to achieve high-quality audio compression with a 1.89 kbps bitrate and 21.5 frames per second. We demonstrate that our novel codec can make the inference of LLM-based text-to-speech models around three times faster while improving intelligibility and producing quality comparable to previous models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12117v1-abstract-full').style.display = 'none'; document.getElementById('2409.12117v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17957">arXiv:2406.17957</a> <span> [<a href="https://arxiv.org/pdf/2406.17957">pdf</a>, <a href="https://arxiv.org/format/2406.17957">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Improving Robustness of LLM-based Speech Synthesis by Learning Monotonic Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Neekhara%2C+P">Paarth Neekhara</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/eess?searchtype=author&query=Ghosh%2C+S">Subhankar Ghosh</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jason Li</a>, <a href="/search/eess?searchtype=author&query=Valle%2C+R">Rafael Valle</a>, <a href="/search/eess?searchtype=author&query=Badlani%2C+R">Rohan Badlani</a>, <a href="/search/eess?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17957v1-abstract-short" style="display: inline;"> Large Language Model (LLM) based text-to-speech (TTS) systems have demonstrated remarkable capabilities in handling large speech datasets and generating natural speech for new speakers. However, LLM-based TTS models are not robust as the generated output can contain repeating words, missing words and mis-aligned speech (referred to as hallucinations or attention errors), especially when the text c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17957v1-abstract-full').style.display = 'inline'; document.getElementById('2406.17957v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17957v1-abstract-full" style="display: none;"> Large Language Model (LLM) based text-to-speech (TTS) systems have demonstrated remarkable capabilities in handling large speech datasets and generating natural speech for new speakers. However, LLM-based TTS models are not robust as the generated output can contain repeating words, missing words and mis-aligned speech (referred to as hallucinations or attention errors), especially when the text contains multiple occurrences of the same token. We examine these challenges in an encoder-decoder transformer model and find that certain cross-attention heads in such models implicitly learn the text and speech alignment when trained for predicting speech tokens for a given text. To make the alignment more robust, we propose techniques utilizing CTC loss and attention priors that encourage monotonic cross-attention over the text tokens. Our guided attention training technique does not introduce any new learnable parameters and significantly improves robustness of LLM-based TTS models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17957v1-abstract-full').style.display = 'none'; document.getElementById('2406.17957v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at INTERSPEECH 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.04332">arXiv:2402.04332</a> <span> [<a href="https://arxiv.org/pdf/2402.04332">pdf</a>, <a href="https://arxiv.org/format/2402.04332">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Proactive Blockage Prediction for UAV assisted Handover in Future Wireless Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ahmad%2C+I">Iftikhar Ahmad</a>, <a href="/search/eess?searchtype=author&query=Khan%2C+A+R">Ahsan Raza Khan</a>, <a href="/search/eess?searchtype=author&query=Jabbar%2C+A">Abdul Jabbar</a>, <a href="/search/eess?searchtype=author&query=Alquraan%2C+M">Muhammad Alquraan</a>, <a href="/search/eess?searchtype=author&query=Mohjazi%2C+L">Lina Mohjazi</a>, <a href="/search/eess?searchtype=author&query=Rehman%2C+M+U">Masood Ur Rehman</a>, <a href="/search/eess?searchtype=author&query=Imran%2C+M+A">Muhammad Ali Imran</a>, <a href="/search/eess?searchtype=author&query=Zoha%2C+A">Ahmed Zoha</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sajjad Hussain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.04332v3-abstract-short" style="display: inline;"> The future wireless communication applications demand seamless connectivity, higher throughput, and low latency, for which the millimeter-wave (mmWave) band is considered a potential technology. Nevertheless, line-of-sight (LoS) is often mandatory for mmWave band communication, and it renders these waves sensitive to sudden changes in the environment. Therefore, it is necessary to maintain the LoS… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04332v3-abstract-full').style.display = 'inline'; document.getElementById('2402.04332v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.04332v3-abstract-full" style="display: none;"> The future wireless communication applications demand seamless connectivity, higher throughput, and low latency, for which the millimeter-wave (mmWave) band is considered a potential technology. Nevertheless, line-of-sight (LoS) is often mandatory for mmWave band communication, and it renders these waves sensitive to sudden changes in the environment. Therefore, it is necessary to maintain the LoS link for a reliable connection. One such technique to maintain LoS is using proactive handover (HO). However, proactive HO is challenging, requiring continuous information about the surrounding wireless network to anticipate potential blockage. This paper presents a proactive blockage prediction mechanism where an unmanned aerial vehicle (UAV) is used as the base station for HO. The proposed scheme uses computer vision (CV) to obtain potential blocking objects, user speed, and location. To assess the effectiveness of the proposed scheme, the system is evaluated using a publicly available dataset for blockage prediction. The study integrates scenarios from Vision-based Wireless (ViWi) and UAV channel modeling, generating wireless data samples relevant to UAVs. The antenna modeling on the UAV end incorporates a polarization-matched scenario to optimize signal reception. The results demonstrate that UAV-assisted Handover not only ensures seamless connectivity but also enhances overall network performance by 20%. This research contributes to the advancement of proactive blockage mitigation strategies in wireless networks, showcasing the potential of UAVs as dynamic and adaptable base stations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04332v3-abstract-full').style.display = 'none'; document.getElementById('2402.04332v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.11255">arXiv:2311.11255</a> <span> [<a href="https://arxiv.org/pdf/2311.11255">pdf</a>, <a href="https://arxiv.org/format/2311.11255">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> M$^{2}$UGen: Multi-modal Music Understanding and Generation with the Power of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shansong Liu</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+A+S">Atin Sakkeer Hussain</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qilong Wu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+C">Chenshuo Sun</a>, <a href="/search/eess?searchtype=author&query=Shan%2C+Y">Ying Shan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.11255v5-abstract-short" style="display: inline;"> The current landscape of research leveraging large language models (LLMs) is experiencing a surge. Many works harness the powerful reasoning capabilities of these models to comprehend various modalities, such as text, speech, images, videos, etc. They also utilize LLMs to understand human intention and generate desired outputs like images, videos, and music. However, research that combines both un… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11255v5-abstract-full').style.display = 'inline'; document.getElementById('2311.11255v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.11255v5-abstract-full" style="display: none;"> The current landscape of research leveraging large language models (LLMs) is experiencing a surge. Many works harness the powerful reasoning capabilities of these models to comprehend various modalities, such as text, speech, images, videos, etc. They also utilize LLMs to understand human intention and generate desired outputs like images, videos, and music. However, research that combines both understanding and generation using LLMs is still limited and in its nascent stage. To address this gap, we introduce a Multi-modal Music Understanding and Generation (M$^{2}$UGen) framework that integrates LLM's abilities to comprehend and generate music for different modalities. The M$^{2}$UGen framework is purpose-built to unlock creative potential from diverse sources of inspiration, encompassing music, image, and video through the use of pretrained MERT, ViT, and ViViT models, respectively. To enable music generation, we explore the use of AudioLDM 2 and MusicGen. Bridging multi-modal understanding and music generation is accomplished through the integration of the LLaMA 2 model. Furthermore, we make use of the MU-LLaMA model to generate extensive datasets that support text/image/video-to-music generation, facilitating the training of our M$^{2}$UGen framework. We conduct a thorough evaluation of our proposed framework. The experimental results demonstrate that our model achieves or surpasses the performance of the current state-of-the-art models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11255v5-abstract-full').style.display = 'none'; document.getElementById('2311.11255v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.09653">arXiv:2310.09653</a> <span> [<a href="https://arxiv.org/pdf/2310.09653">pdf</a>, <a href="https://arxiv.org/format/2310.09653">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> SelfVC: Voice Conversion With Iterative Refinement using Self Transformations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Neekhara%2C+P">Paarth Neekhara</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/eess?searchtype=author&query=Valle%2C+R">Rafael Valle</a>, <a href="/search/eess?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a>, <a href="/search/eess?searchtype=author&query=Ranjan%2C+R">Rishabh Ranjan</a>, <a href="/search/eess?searchtype=author&query=Dubnov%2C+S">Shlomo Dubnov</a>, <a href="/search/eess?searchtype=author&query=Koushanfar%2C+F">Farinaz Koushanfar</a>, <a href="/search/eess?searchtype=author&query=McAuley%2C+J">Julian McAuley</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.09653v2-abstract-short" style="display: inline;"> We propose SelfVC, a training strategy to iteratively improve a voice conversion model with self-synthesized examples. Previous efforts on voice conversion focus on factorizing speech into explicitly disentangled representations that separately encode speaker characteristics and linguistic content. However, disentangling speech representations to capture such attributes using task-specific loss te… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09653v2-abstract-full').style.display = 'inline'; document.getElementById('2310.09653v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.09653v2-abstract-full" style="display: none;"> We propose SelfVC, a training strategy to iteratively improve a voice conversion model with self-synthesized examples. Previous efforts on voice conversion focus on factorizing speech into explicitly disentangled representations that separately encode speaker characteristics and linguistic content. However, disentangling speech representations to capture such attributes using task-specific loss terms can lead to information loss. In this work, instead of explicitly disentangling attributes with loss terms, we present a framework to train a controllable voice conversion model on entangled speech representations derived from self-supervised learning (SSL) and speaker verification models. First, we develop techniques to derive prosodic information from the audio signal and SSL representations to train predictive submodules in the synthesis model. Next, we propose a training strategy to iteratively improve the synthesis model for voice conversion, by creating a challenging training objective using self-synthesized examples. We demonstrate that incorporating such self-synthesized examples during training improves the speaker similarity of generated speech as compared to a baseline voice conversion model trained solely on heuristically perturbed inputs. Our framework is trained without any text and achieves state-of-the-art results in zero-shot voice conversion on metrics evaluating naturalness, speaker similarity, and intelligibility of synthesized audio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09653v2-abstract-full').style.display = 'none'; document.getElementById('2310.09653v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICML 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.11276">arXiv:2308.11276</a> <span> [<a href="https://arxiv.org/pdf/2308.11276">pdf</a>, <a href="https://arxiv.org/format/2308.11276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Music Understanding LLaMA: Advancing Text-to-Music Generation with Question Answering and Captioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shansong Liu</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+A+S">Atin Sakkeer Hussain</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+C">Chenshuo Sun</a>, <a href="/search/eess?searchtype=author&query=Shan%2C+Y">Ying Shan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.11276v1-abstract-short" style="display: inline;"> Text-to-music generation (T2M-Gen) faces a major obstacle due to the scarcity of large-scale publicly available music datasets with natural language captions. To address this, we propose the Music Understanding LLaMA (MU-LLaMA), capable of answering music-related questions and generating captions for music files. Our model utilizes audio representations from a pretrained MERT model to extract musi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.11276v1-abstract-full').style.display = 'inline'; document.getElementById('2308.11276v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.11276v1-abstract-full" style="display: none;"> Text-to-music generation (T2M-Gen) faces a major obstacle due to the scarcity of large-scale publicly available music datasets with natural language captions. To address this, we propose the Music Understanding LLaMA (MU-LLaMA), capable of answering music-related questions and generating captions for music files. Our model utilizes audio representations from a pretrained MERT model to extract music features. However, obtaining a suitable dataset for training the MU-LLaMA model remains challenging, as existing publicly accessible audio question answering datasets lack the necessary depth for open-ended music question answering. To fill this gap, we present a methodology for generating question-answer pairs from existing audio captioning datasets and introduce the MusicQA Dataset designed for answering open-ended music-related questions. The experiments demonstrate that the proposed MU-LLaMA model, trained on our designed MusicQA dataset, achieves outstanding performance in both music question answering and music caption generation across various metrics, outperforming current state-of-the-art (SOTA) models in both fields and offering a promising advancement in the T2M-Gen research field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.11276v1-abstract-full').style.display = 'none'; document.getElementById('2308.11276v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.07436">arXiv:2308.07436</a> <span> [<a href="https://arxiv.org/pdf/2308.07436">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Hybrid Deep Spatio-Temporal Attention-Based Model for Parkinson's Disease Diagnosis Using Resting State EEG Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Delfan%2C+N">Niloufar Delfan</a>, <a href="/search/eess?searchtype=author&query=Shahsavari%2C+M">Mohammadreza Shahsavari</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sadiq Hussain</a>, <a href="/search/eess?searchtype=author&query=Dama%C5%A1evi%C4%8Dius%2C+R">Robertas Dama拧evi膷ius</a>, <a href="/search/eess?searchtype=author&query=Acharya%2C+U+R">U. Rajendra Acharya</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.07436v1-abstract-short" style="display: inline;"> Parkinson's disease (PD), a severe and progressive neurological illness, affects millions of individuals worldwide. For effective treatment and management of PD, an accurate and early diagnosis is crucial. This study presents a deep learning-based model for the diagnosis of PD using resting state electroencephalogram (EEG) signal. The objective of the study is to develop an automated model that ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07436v1-abstract-full').style.display = 'inline'; document.getElementById('2308.07436v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.07436v1-abstract-full" style="display: none;"> Parkinson's disease (PD), a severe and progressive neurological illness, affects millions of individuals worldwide. For effective treatment and management of PD, an accurate and early diagnosis is crucial. This study presents a deep learning-based model for the diagnosis of PD using resting state electroencephalogram (EEG) signal. The objective of the study is to develop an automated model that can extract complex hidden nonlinear features from EEG and demonstrate its generalizability on unseen data. The model is designed using a hybrid model, consists of convolutional neural network (CNN), bidirectional gated recurrent unit (Bi-GRU), and attention mechanism. The proposed method is evaluated on three public datasets (Uc San Diego Dataset, PRED-CT, and University of Iowa (UI) dataset), with one dataset used for training and the other two for evaluation. The results show that the proposed model can accurately diagnose PD with high performance on both the training and hold-out datasets. The model also performs well even when some part of the input information is missing. The results of this work have significant implications for patient treatment and for ongoing investigations into the early detection of Parkinson's disease. The suggested model holds promise as a non-invasive and reliable technique for PD early detection utilizing resting state EEG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07436v1-abstract-full').style.display = 'none'; document.getElementById('2308.07436v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.11993">arXiv:2307.11993</a> <span> [<a href="https://arxiv.org/pdf/2307.11993">pdf</a>, <a href="https://arxiv.org/format/2307.11993">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Operating Systems">cs.OS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Verifiable Sustainability in Data Centers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hussain%2C+S+R">Syed Rafiul Hussain</a>, <a href="/search/eess?searchtype=author&query=McDaniel%2C+P">Patrick McDaniel</a>, <a href="/search/eess?searchtype=author&query=Gandhi%2C+A">Anshul Gandhi</a>, <a href="/search/eess?searchtype=author&query=Ghose%2C+K">Kanad Ghose</a>, <a href="/search/eess?searchtype=author&query=Gopalan%2C+K">Kartik Gopalan</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+D">Dongyoon Lee</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y+D">Yu David Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zhenhua Liu</a>, <a href="/search/eess?searchtype=author&query=Mu%2C+S">Shuai Mu</a>, <a href="/search/eess?searchtype=author&query=Zadok%2C+E">Erez Zadok</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.11993v3-abstract-short" style="display: inline;"> Data centers have significant energy needs, both embodied and operational, affecting sustainability adversely. The current techniques and tools for collecting, aggregating, and reporting verifiable sustainability data are vulnerable to cyberattacks and misuse, requiring new security and privacy-preserving solutions. This paper outlines security challenges and research directions for addressing the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11993v3-abstract-full').style.display = 'inline'; document.getElementById('2307.11993v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.11993v3-abstract-full" style="display: none;"> Data centers have significant energy needs, both embodied and operational, affecting sustainability adversely. The current techniques and tools for collecting, aggregating, and reporting verifiable sustainability data are vulnerable to cyberattacks and misuse, requiring new security and privacy-preserving solutions. This paper outlines security challenges and research directions for addressing these pressing requirements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11993v3-abstract-full').style.display = 'none'; document.getElementById('2307.11993v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.01441">arXiv:2304.01441</a> <span> [<a href="https://arxiv.org/pdf/2304.01441">pdf</a>, <a href="https://arxiv.org/format/2304.01441">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NetFlick: Adversarial Flickering Attacks on Deep Learning Based Video Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chang%2C+J">Jung-Woo Chang</a>, <a href="/search/eess?searchtype=author&query=Sheybani%2C+N">Nojan Sheybani</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S+S">Shehzeen Samarah Hussain</a>, <a href="/search/eess?searchtype=author&query=Javaheripi%2C+M">Mojan Javaheripi</a>, <a href="/search/eess?searchtype=author&query=Hidano%2C+S">Seira Hidano</a>, <a href="/search/eess?searchtype=author&query=Koushanfar%2C+F">Farinaz Koushanfar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.01441v1-abstract-short" style="display: inline;"> Video compression plays a significant role in IoT devices for the efficient transport of visual data while satisfying all underlying bandwidth constraints. Deep learning-based video compression methods are rapidly replacing traditional algorithms and providing state-of-the-art results on edge devices. However, recently developed adversarial attacks demonstrate that digitally crafted perturbations… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.01441v1-abstract-full').style.display = 'inline'; document.getElementById('2304.01441v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.01441v1-abstract-full" style="display: none;"> Video compression plays a significant role in IoT devices for the efficient transport of visual data while satisfying all underlying bandwidth constraints. Deep learning-based video compression methods are rapidly replacing traditional algorithms and providing state-of-the-art results on edge devices. However, recently developed adversarial attacks demonstrate that digitally crafted perturbations can break the Rate-Distortion relationship of video compression. In this work, we present a real-world LED attack to target video compression frameworks. Our physically realizable attack, dubbed NetFlick, can degrade the spatio-temporal correlation between successive frames by injecting flickering temporal perturbations. In addition, we propose universal perturbations that can downgrade performance of incoming video without prior knowledge of the contents. Experimental results demonstrate that NetFlick can successfully deteriorate the performance of video compression frameworks in both digital- and physical-settings and can be further extended to attack downstream video classification networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.01441v1-abstract-full').style.display = 'none'; document.getElementById('2304.01441v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages; Accepted to ICLR 2023 ML4IoT workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.08137">arXiv:2302.08137</a> <span> [<a href="https://arxiv.org/pdf/2302.08137">pdf</a>, <a href="https://arxiv.org/format/2302.08137">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> ACE-VC: Adaptive and Controllable Voice Conversion using Explicitly Disentangled Self-supervised Speech Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/eess?searchtype=author&query=Neekhara%2C+P">Paarth Neekhara</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jocelyn Huang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jason Li</a>, <a href="/search/eess?searchtype=author&query=Ginsburg%2C+B">Boris Ginsburg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.08137v1-abstract-short" style="display: inline;"> In this work, we propose a zero-shot voice conversion method using speech representations trained with self-supervised learning. First, we develop a multi-task model to decompose a speech utterance into features such as linguistic content, speaker characteristics, and speaking style. To disentangle content and speaker representations, we propose a training strategy based on Siamese networks that e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08137v1-abstract-full').style.display = 'inline'; document.getElementById('2302.08137v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.08137v1-abstract-full" style="display: none;"> In this work, we propose a zero-shot voice conversion method using speech representations trained with self-supervised learning. First, we develop a multi-task model to decompose a speech utterance into features such as linguistic content, speaker characteristics, and speaking style. To disentangle content and speaker representations, we propose a training strategy based on Siamese networks that encourages similarity between the content representations of the original and pitch-shifted audio. Next, we develop a synthesis model with pitch and duration predictors that can effectively reconstruct the speech signal from its decomposed representation. Our framework allows controllable and speaker-adaptive synthesis to perform zero-shot any-to-any voice conversion achieving state-of-the-art results on metrics evaluating speaker similarity, intelligibility, and naturalness. Using just 10 seconds of data for a target speaker, our framework can perform voice swapping and achieves a speaker verification EER of 5.5% for seen speakers and 8.4% for unseen speakers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08137v1-abstract-full').style.display = 'none'; document.getElementById('2302.08137v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at ICASSP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.13893">arXiv:2212.13893</a> <span> [<a href="https://arxiv.org/pdf/2212.13893">pdf</a>, <a href="https://arxiv.org/format/2212.13893">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Machine learning for accelerating the discovery of high performance low-cost solar cells: a systematic review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Bhatti%2C+S">Satyam Bhatti</a>, <a href="/search/eess?searchtype=author&query=Manzoor%2C+H+U">Habib Ullah Manzoor</a>, <a href="/search/eess?searchtype=author&query=Michel%2C+B">Bruno Michel</a>, <a href="/search/eess?searchtype=author&query=Bonilla%2C+R+S">Ruy Sebastian Bonilla</a>, <a href="/search/eess?searchtype=author&query=Abrams%2C+R">Richard Abrams</a>, <a href="/search/eess?searchtype=author&query=Zoha%2C+A">Ahmed Zoha</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sajjad Hussain</a>, <a href="/search/eess?searchtype=author&query=Ghannam%2C+R">Rami Ghannam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.13893v1-abstract-short" style="display: inline;"> Solar photovoltaic (PV) technology has merged as an efficient and versatile method for converting the Sun's vast energy into electricity. Innovation in developing new materials and solar cell architectures is required to ensure lightweight, portable, and flexible miniaturized electronic devices operate for long periods with reduced battery demand. Recent advances in biomedical implantable and wear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.13893v1-abstract-full').style.display = 'inline'; document.getElementById('2212.13893v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.13893v1-abstract-full" style="display: none;"> Solar photovoltaic (PV) technology has merged as an efficient and versatile method for converting the Sun's vast energy into electricity. Innovation in developing new materials and solar cell architectures is required to ensure lightweight, portable, and flexible miniaturized electronic devices operate for long periods with reduced battery demand. Recent advances in biomedical implantable and wearable devices have coincided with a growing interest in efficient energy-harvesting solutions. Such devices primarily rely on rechargeable batteries to satisfy their energy needs. Moreover, Artificial Intelligence (AI) and Machine Learning (ML) techniques are touted as game changers in energy harvesting, especially in solar energy materials. In this article, we systematically review a range of ML techniques for optimizing the performance of low-cost solar cells for miniaturized electronic devices. Our systematic review reveals that these ML techniques can expedite the discovery of new solar cell materials and architectures. In particular, this review covers a broad range of ML techniques targeted at producing low-cost solar cells. Moreover, we present a new method of classifying the literature according to data synthesis, ML algorithms, optimization, and fabrication process. In addition, our review reveals that the Gaussian Process Regression (GPR) ML technique with Bayesian Optimization (BO) enables the design of the most promising low-solar cell architecture. Therefore, our review is a critical evaluation of existing ML techniques and is presented to guide researchers in discovering the next generation of low-cost solar cells using ML techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.13893v1-abstract-full').style.display = 'none'; document.getElementById('2212.13893v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.07967">arXiv:2204.07967</a> <span> [<a href="https://arxiv.org/pdf/2204.07967">pdf</a>, <a href="https://arxiv.org/format/2204.07967">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/drones7030214">10.3390/drones7030214 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Survey on Energy Optimization Techniques in UAV-Based Cellular Networks: From Conventional to Machine Learning Approaches </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Abubakar%2C+A+I">Attai Ibrahim Abubakar</a>, <a href="/search/eess?searchtype=author&query=Ahmad%2C+I">Iftikhar Ahmad</a>, <a href="/search/eess?searchtype=author&query=Omeke%2C+K+G">Kenechi G. Omeke</a>, <a href="/search/eess?searchtype=author&query=Ozturk%2C+M">Metin Ozturk</a>, <a href="/search/eess?searchtype=author&query=Ozturk%2C+C">Cihat Ozturk</a>, <a href="/search/eess?searchtype=author&query=Abdel-Salam%2C+A+M">Ali Makine Abdel-Salam</a>, <a href="/search/eess?searchtype=author&query=Mollel%2C+M+S">Michael S. Mollel</a>, <a href="/search/eess?searchtype=author&query=Abbasi%2C+Q+H">Qammer H. Abbasi</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sajjad Hussain</a>, <a href="/search/eess?searchtype=author&query=Imran%2C+M+A">Muhammad Ali Imran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.07967v1-abstract-short" style="display: inline;"> Wireless communication networks have been witnessing an unprecedented demand due to the increasing number of connected devices and emerging bandwidth-hungry applications. Albeit many competent technologies for capacity enhancement purposes, such as millimeter wave communications and network densification, there is still room and need for further capacity enhancement in wireless communication netwo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07967v1-abstract-full').style.display = 'inline'; document.getElementById('2204.07967v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.07967v1-abstract-full" style="display: none;"> Wireless communication networks have been witnessing an unprecedented demand due to the increasing number of connected devices and emerging bandwidth-hungry applications. Albeit many competent technologies for capacity enhancement purposes, such as millimeter wave communications and network densification, there is still room and need for further capacity enhancement in wireless communication networks, especially for the cases of unusual people gatherings, such as sport competitions, musical concerts, etc. Unmanned aerial vehicles (UAVs) have been identified as one of the promising options to enhance the capacity due to their easy implementation, pop up fashion operation, and cost-effective nature. The main idea is to deploy base stations on UAVs and operate them as flying base stations, thereby bringing additional capacity to where it is needed. However, because the UAVs mostly have limited energy storage, their energy consumption must be optimized to increase flight time. In this survey, we investigate different energy optimization techniques with a top-level classification in terms of the optimization algorithm employed; conventional and machine learning (ML). Such classification helps understand the state of the art and the current trend in terms of methodology. In this regard, various optimization techniques are identified from the related literature, and they are presented under the above mentioned classes of employed optimization methods. In addition, for the purpose of completeness, we include a brief tutorial on the optimization methods and power supply and charging mechanisms of UAVs. Moreover, novel concepts, such as reflective intelligent surfaces and landing spot optimization, are also covered to capture the latest trend in the literature. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07967v1-abstract-full').style.display = 'none'; document.getElementById('2204.07967v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">41 pages, 5 Figures, 6 Tables. Submitted to Open Journal of Communications Society (OJ-COMS)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> MDPI Drones 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.01077">arXiv:2110.01077</a> <span> [<a href="https://arxiv.org/pdf/2110.01077">pdf</a>, <a href="https://arxiv.org/format/2110.01077">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Multi-task Voice Activated Framework using Self-supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/eess?searchtype=author&query=Nguyen%2C+V">Van Nguyen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Shuhua Zhang</a>, <a href="/search/eess?searchtype=author&query=Visser%2C+E">Erik Visser</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.01077v3-abstract-short" style="display: inline;"> Self-supervised learning methods such as wav2vec 2.0 have shown promising results in learning speech representations from unlabelled and untranscribed speech data that are useful for speech recognition. Since these representations are learned without any task-specific supervision, they can also be useful for other voice-activated tasks like speaker verification, keyword spotting, emotion classific… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.01077v3-abstract-full').style.display = 'inline'; document.getElementById('2110.01077v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.01077v3-abstract-full" style="display: none;"> Self-supervised learning methods such as wav2vec 2.0 have shown promising results in learning speech representations from unlabelled and untranscribed speech data that are useful for speech recognition. Since these representations are learned without any task-specific supervision, they can also be useful for other voice-activated tasks like speaker verification, keyword spotting, emotion classification etc. In our work, we propose a general purpose framework for adapting a pre-trained wav2vec 2.0 model for different voice-activated tasks. We develop downstream network architectures that operate on the contextualized speech representations of wav2vec 2.0 to adapt the representations for solving a given task. Finally, we extend our framework to perform multi-task learning by jointly optimizing the network parameters on multiple voice activated tasks using a shared transformer backbone. Both of our single and multi-task frameworks achieve state-of-the-art results in speaker verification and keyword spotting benchmarks. Our best performing models achieve 1.98% and 3.15% EER on VoxCeleb1 test set when trained on VoxCeleb2 and VoxCeleb1 respectively, and 98.23% accuracy on Google Speech Commands v1.0 keyword spotting dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.01077v3-abstract-full').style.display = 'none'; document.getElementById('2110.01077v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICASSP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.11697">arXiv:2108.11697</a> <span> [<a href="https://arxiv.org/pdf/2108.11697">pdf</a>, <a href="https://arxiv.org/format/2108.11697">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Revenue Maximization through Cell Switching and Spectrum Leasing in 5G HetNets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Abubakar%2C+A+I">Attai Ibrahim Abubakar</a>, <a href="/search/eess?searchtype=author&query=Ozturk%2C+C">Cihat Ozturk</a>, <a href="/search/eess?searchtype=author&query=Ozturk%2C+M">Metin Ozturk</a>, <a href="/search/eess?searchtype=author&query=Mollel%2C+M+S">Michael S. Mollel</a>, <a href="/search/eess?searchtype=author&query=Asad%2C+S+M">Syed Muhammad Asad</a>, <a href="/search/eess?searchtype=author&query=Hassan%2C+N+U">Naveed Ul Hassan</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sajjad Hussain</a>, <a href="/search/eess?searchtype=author&query=Imran%2C+M">MuhammadAli Imran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.11697v1-abstract-short" style="display: inline;"> One of the ways of achieving improved capacity in mobile cellular networks is via network densification. Even though densification increases the capacity of the network, it also leads to increased energy consumption which can be curbed by dynamically switching off some base stations (BSs) during periods of low traffic. However, dynamic cell switching has the challenge of spectrum under-utilization… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.11697v1-abstract-full').style.display = 'inline'; document.getElementById('2108.11697v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.11697v1-abstract-full" style="display: none;"> One of the ways of achieving improved capacity in mobile cellular networks is via network densification. Even though densification increases the capacity of the network, it also leads to increased energy consumption which can be curbed by dynamically switching off some base stations (BSs) during periods of low traffic. However, dynamic cell switching has the challenge of spectrum under-utilizationas the spectrum originally occupied by the BSs that are turned off remains dormant. This dormant spectrum can be leased by the primary network (PN) operators, who hold the license, to the secondary network (SN) operators who cannot afford to purchase the spectrum license. Thus enabling the PN to gain additional revenue from spectrum leasing as well as from electricity cost savings due to reduced energy consumption. Therefore, in this work, we propose a cell switching and spectrum leasing framework based on simulated annealing (SA) algorithm to maximize the revenue of the PN while respecting the quality-of-service constraints. The performance evaluation reveals that the proposed method is very close to optimal exhaustive search method with a significant reduction in the computation complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.11697v1-abstract-full').style.display = 'none'; document.getElementById('2108.11697v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 6 figures, Submitted to IEEE Transactions on cognitive communications and networking (TCCN)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.08590">arXiv:2105.08590</a> <span> [<a href="https://arxiv.org/pdf/2105.08590">pdf</a>, <a href="https://arxiv.org/format/2105.08590">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.inffus.2022.09.023">10.1016/j.inffus.2022.09.023 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> UncertaintyFuseNet: Robust Uncertainty-aware Hierarchical Feature Fusion Model with Ensemble Monte Carlo Dropout for COVID-19 Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Abdar%2C+M">Moloud Abdar</a>, <a href="/search/eess?searchtype=author&query=Salari%2C+S">Soorena Salari</a>, <a href="/search/eess?searchtype=author&query=Qahremani%2C+S">Sina Qahremani</a>, <a href="/search/eess?searchtype=author&query=Lam%2C+H">Hak-Keung Lam</a>, <a href="/search/eess?searchtype=author&query=Karray%2C+F">Fakhri Karray</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sadiq Hussain</a>, <a href="/search/eess?searchtype=author&query=Khosravi%2C+A">Abbas Khosravi</a>, <a href="/search/eess?searchtype=author&query=Acharya%2C+U+R">U. Rajendra Acharya</a>, <a href="/search/eess?searchtype=author&query=Makarenkov%2C+V">Vladimir Makarenkov</a>, <a href="/search/eess?searchtype=author&query=Nahavandi%2C+S">Saeid Nahavandi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.08590v3-abstract-short" style="display: inline;"> The COVID-19 (Coronavirus disease 2019) pandemic has become a major global threat to human health and well-being. Thus, the development of computer-aided detection (CAD) systems that are capable to accurately distinguish COVID-19 from other diseases using chest computed tomography (CT) and X-ray data is of immediate priority. Such automatic systems are usually based on traditional machine learning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.08590v3-abstract-full').style.display = 'inline'; document.getElementById('2105.08590v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.08590v3-abstract-full" style="display: none;"> The COVID-19 (Coronavirus disease 2019) pandemic has become a major global threat to human health and well-being. Thus, the development of computer-aided detection (CAD) systems that are capable to accurately distinguish COVID-19 from other diseases using chest computed tomography (CT) and X-ray data is of immediate priority. Such automatic systems are usually based on traditional machine learning or deep learning methods. Differently from most of existing studies, which used either CT scan or X-ray images in COVID-19-case classification, we present a simple but efficient deep learning feature fusion model, called UncertaintyFuseNet, which is able to classify accurately large datasets of both of these types of images. We argue that the uncertainty of the model's predictions should be taken into account in the learning process, even though most of existing studies have overlooked it. We quantify the prediction uncertainty in our feature fusion model using effective Ensemble MC Dropout (EMCD) technique. A comprehensive simulation study has been conducted to compare the results of our new model to the existing approaches, evaluating the performance of competing models in terms of Precision, Recall, F-Measure, Accuracy and ROC curves. The obtained results prove the efficiency of our model which provided the prediction accuracy of 99.08\% and 96.35\% for the considered CT scan and X-ray datasets, respectively. Moreover, our UncertaintyFuseNet model was generally robust to noise and performed well with previously unseen data. The source code of our implementation is freely available at: https://github.com/moloud1987/UncertaintyFuseNet-for-COVID-19-Classification. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.08590v3-abstract-full').style.display = 'none'; document.getElementById('2105.08590v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 18 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Information Fusion 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.08954">arXiv:2104.08954</a> <span> [<a href="https://arxiv.org/pdf/2104.08954">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41598-021-93543-8">10.1038/s41598-021-93543-8 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Combining a Convolutional Neural Network with Autoencoders to Predict the Survival Chance of COVID-19 Patients </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Khozeimeh%2C+F">Fahime Khozeimeh</a>, <a href="/search/eess?searchtype=author&query=Sharifrazi%2C+D">Danial Sharifrazi</a>, <a href="/search/eess?searchtype=author&query=Izadi%2C+N+H">Navid Hoseini Izadi</a>, <a href="/search/eess?searchtype=author&query=Joloudari%2C+J+H">Javad Hassannataj Joloudari</a>, <a href="/search/eess?searchtype=author&query=Shoeibi%2C+A">Afshin Shoeibi</a>, <a href="/search/eess?searchtype=author&query=Alizadehsani%2C+R">Roohallah Alizadehsani</a>, <a href="/search/eess?searchtype=author&query=Gorriz%2C+J+M">Juan M. Gorriz</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sadiq Hussain</a>, <a href="/search/eess?searchtype=author&query=Sani%2C+Z+A">Zahra Alizadeh Sani</a>, <a href="/search/eess?searchtype=author&query=Moosaei%2C+H">Hossein Moosaei</a>, <a href="/search/eess?searchtype=author&query=Khosravi%2C+A">Abbas Khosravi</a>, <a href="/search/eess?searchtype=author&query=Nahavandi%2C+S">Saeid Nahavandi</a>, <a href="/search/eess?searchtype=author&query=Islam%2C+S+M+S">Sheikh Mohammed Shariful Islam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.08954v2-abstract-short" style="display: inline;"> COVID-19 has caused many deaths worldwide. The automation of the diagnosis of this virus is highly desired. Convolutional neural networks (CNNs) have shown outstanding classification performance on image datasets. To date, it appears that COVID computer-aided diagnosis systems based on CNNs and clinical information have not yet been analysed or explored. We propose a novel method, named the CNN-AE… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.08954v2-abstract-full').style.display = 'inline'; document.getElementById('2104.08954v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.08954v2-abstract-full" style="display: none;"> COVID-19 has caused many deaths worldwide. The automation of the diagnosis of this virus is highly desired. Convolutional neural networks (CNNs) have shown outstanding classification performance on image datasets. To date, it appears that COVID computer-aided diagnosis systems based on CNNs and clinical information have not yet been analysed or explored. We propose a novel method, named the CNN-AE, to predict the survival chance of COVID-19 patients using a CNN trained with clinical information. Notably, the required resources to prepare CT images are expensive and limited compared to those required to collect clinical data, such as blood pressure, liver disease, etc. We evaluated our method using a publicly available clinical dataset that we collected. The dataset properties were carefully analysed to extract important features and compute the correlations of features. A data augmentation procedure based on autoencoders (AEs) was proposed to balance the dataset. The experimental results revealed that the average accuracy of the CNN-AE (96.05%) was higher than that of the CNN (92.49%). To demonstrate the generality of our augmentation method, we trained some existing mortality risk prediction methods on our dataset (with and without data augmentation) and compared their performances. We also evaluated our method using another dataset for further generality verification. To show that clinical data can be used for COVID-19 survival chance prediction, the CNN-AE was compared with multiple pre-trained deep models that were tuned based on CT images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.08954v2-abstract-full').style.display = 'none'; document.getElementById('2104.08954v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Scientific Reports, 11(1), 1-18 (2021) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.03344">arXiv:2103.03344</a> <span> [<a href="https://arxiv.org/pdf/2103.03344">pdf</a>, <a href="https://arxiv.org/format/2103.03344">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> WaveGuard: Understanding and Mitigating Audio Adversarial Examples </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/eess?searchtype=author&query=Neekhara%2C+P">Paarth Neekhara</a>, <a href="/search/eess?searchtype=author&query=Dubnov%2C+S">Shlomo Dubnov</a>, <a href="/search/eess?searchtype=author&query=McAuley%2C+J">Julian McAuley</a>, <a href="/search/eess?searchtype=author&query=Koushanfar%2C+F">Farinaz Koushanfar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.03344v1-abstract-short" style="display: inline;"> There has been a recent surge in adversarial attacks on deep learning based automatic speech recognition (ASR) systems. These attacks pose new challenges to deep learning security and have raised significant concerns in deploying ASR systems in safety-critical applications. In this work, we introduce WaveGuard: a framework for detecting adversarial inputs that are crafted to attack ASR systems. Ou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.03344v1-abstract-full').style.display = 'inline'; document.getElementById('2103.03344v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.03344v1-abstract-full" style="display: none;"> There has been a recent surge in adversarial attacks on deep learning based automatic speech recognition (ASR) systems. These attacks pose new challenges to deep learning security and have raised significant concerns in deploying ASR systems in safety-critical applications. In this work, we introduce WaveGuard: a framework for detecting adversarial inputs that are crafted to attack ASR systems. Our framework incorporates audio transformation functions and analyses the ASR transcriptions of the original and transformed audio to detect adversarial inputs. We demonstrate that our defense framework is able to reliably detect adversarial examples constructed by four recent audio adversarial attacks, with a variety of audio transformation functions. With careful regard for best practices in defense evaluations, we analyze our proposed defense and its strength to withstand adaptive and robust attacks in the audio domain. We empirically demonstrate that audio transformations that recover audio from perceptually informed representations can lead to a strong defense that is robust against an adaptive adversary even in a complete white-box setting. Furthermore, WaveGuard can be used out-of-the box and integrated directly with any ASR model to efficiently detect audio adversarial examples, without the need for model retraining. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.03344v1-abstract-full').style.display = 'none'; document.getElementById('2103.03344v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at Usenix Security 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.06883">arXiv:2102.06883</a> <span> [<a href="https://arxiv.org/pdf/2102.06883">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Fusion of convolution neural network, support vector machine and Sobel filter for accurate detection of COVID-19 patients using X-ray images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Sharifrazi%2C+D">Danial Sharifrazi</a>, <a href="/search/eess?searchtype=author&query=Alizadehsani%2C+R">Roohallah Alizadehsani</a>, <a href="/search/eess?searchtype=author&query=Roshanzamir%2C+M">Mohamad Roshanzamir</a>, <a href="/search/eess?searchtype=author&query=Joloudari%2C+J+H">Javad Hassannataj Joloudari</a>, <a href="/search/eess?searchtype=author&query=Shoeibi%2C+A">Afshin Shoeibi</a>, <a href="/search/eess?searchtype=author&query=Jafari%2C+M">Mahboobeh Jafari</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sadiq Hussain</a>, <a href="/search/eess?searchtype=author&query=Sani%2C+Z+A">Zahra Alizadeh Sani</a>, <a href="/search/eess?searchtype=author&query=Hasanzadeh%2C+F">Fereshteh Hasanzadeh</a>, <a href="/search/eess?searchtype=author&query=Khozeimeh%2C+F">Fahime Khozeimeh</a>, <a href="/search/eess?searchtype=author&query=Khosravi%2C+A">Abbas Khosravi</a>, <a href="/search/eess?searchtype=author&query=Nahavandi%2C+S">Saeid Nahavandi</a>, <a href="/search/eess?searchtype=author&query=Panahiazar%2C+M">Maryam Panahiazar</a>, <a href="/search/eess?searchtype=author&query=Zare%2C+A">Assef Zare</a>, <a href="/search/eess?searchtype=author&query=Islam%2C+S+M+S">Sheikh Mohammed Shariful Islam</a>, <a href="/search/eess?searchtype=author&query=Acharya%2C+U+R">U Rajendra Acharya</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.06883v1-abstract-short" style="display: inline;"> The coronavirus (COVID-19) is currently the most common contagious disease which is prevalent all over the world. The main challenge of this disease is the primary diagnosis to prevent secondary infections and its spread from one person to another. Therefore, it is essential to use an automatic diagnosis system along with clinical procedures for the rapid diagnosis of COVID-19 to prevent its sprea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.06883v1-abstract-full').style.display = 'inline'; document.getElementById('2102.06883v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.06883v1-abstract-full" style="display: none;"> The coronavirus (COVID-19) is currently the most common contagious disease which is prevalent all over the world. The main challenge of this disease is the primary diagnosis to prevent secondary infections and its spread from one person to another. Therefore, it is essential to use an automatic diagnosis system along with clinical procedures for the rapid diagnosis of COVID-19 to prevent its spread. Artificial intelligence techniques using computed tomography (CT) images of the lungs and chest radiography have the potential to obtain high diagnostic performance for Covid-19 diagnosis. In this study, a fusion of convolutional neural network (CNN), support vector machine (SVM), and Sobel filter is proposed to detect COVID-19 using X-ray images. A new X-ray image dataset was collected and subjected to high pass filter using a Sobel filter to obtain the edges of the images. Then these images are fed to CNN deep learning model followed by SVM classifier with ten-fold cross validation strategy. This method is designed so that it can learn with not many data. Our results show that the proposed CNN-SVM with Sobel filtering (CNN-SVM+Sobel) achieved the highest classification accuracy of 99.02% in accurate detection of COVID-19. It showed that using Sobel filter can improve the performance of CNN. Unlike most of the other researches, this method does not use a pre-trained network. We have also validated our developed model using six public databases and obtained the highest performance. Hence, our developed model is ready for clinical application <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.06883v1-abstract-full').style.display = 'none'; document.getElementById('2102.06883v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.06388">arXiv:2102.06388</a> <span> [<a href="https://arxiv.org/pdf/2102.06388">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3462635">10.1145/3462635 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Uncertainty-Aware Semi-Supervised Method Using Large Unlabeled and Limited Labeled COVID-19 Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Alizadehsani%2C+R">Roohallah Alizadehsani</a>, <a href="/search/eess?searchtype=author&query=Sharifrazi%2C+D">Danial Sharifrazi</a>, <a href="/search/eess?searchtype=author&query=Izadi%2C+N+H">Navid Hoseini Izadi</a>, <a href="/search/eess?searchtype=author&query=Joloudari%2C+J+H">Javad Hassannataj Joloudari</a>, <a href="/search/eess?searchtype=author&query=Shoeibi%2C+A">Afshin Shoeibi</a>, <a href="/search/eess?searchtype=author&query=Gorriz%2C+J+M">Juan M. Gorriz</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sadiq Hussain</a>, <a href="/search/eess?searchtype=author&query=Arco%2C+J+E">Juan E. Arco</a>, <a href="/search/eess?searchtype=author&query=Sani%2C+Z+A">Zahra Alizadeh Sani</a>, <a href="/search/eess?searchtype=author&query=Khozeimeh%2C+F">Fahime Khozeimeh</a>, <a href="/search/eess?searchtype=author&query=Khosravi%2C+A">Abbas Khosravi</a>, <a href="/search/eess?searchtype=author&query=Nahavandi%2C+S">Saeid Nahavandi</a>, <a href="/search/eess?searchtype=author&query=Islam%2C+S+M+S">Sheikh Mohammed Shariful Islam</a>, <a href="/search/eess?searchtype=author&query=Acharya%2C+U+R">U Rajendra Acharya</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.06388v2-abstract-short" style="display: inline;"> The new coronavirus has caused more than one million deaths and continues to spread rapidly. This virus targets the lungs, causing respiratory distress which can be mild or severe. The X-ray or computed tomography (CT) images of lungs can reveal whether the patient is infected with COVID-19 or not. Many researchers are trying to improve COVID-19 detection using artificial intelligence. Our motivat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.06388v2-abstract-full').style.display = 'inline'; document.getElementById('2102.06388v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.06388v2-abstract-full" style="display: none;"> The new coronavirus has caused more than one million deaths and continues to spread rapidly. This virus targets the lungs, causing respiratory distress which can be mild or severe. The X-ray or computed tomography (CT) images of lungs can reveal whether the patient is infected with COVID-19 or not. Many researchers are trying to improve COVID-19 detection using artificial intelligence. Our motivation is to develop an automatic method that can cope with scenarios in which preparing labeled data is time consuming or expensive. In this article, we propose a Semi-supervised Classification using Limited Labeled Data (SCLLD) relying on Sobel edge detection and Generative Adversarial Networks (GANs) to automate the COVID-19 diagnosis. The GAN discriminator output is a probabilistic value which is used for classification in this work. The proposed system is trained using 10,000 CT scans collected from Omid Hospital, whereas a public dataset is also used for validating our system. The proposed method is compared with other state-of-the-art supervised methods such as Gaussian processes. To the best of our knowledge, this is the first time a semi-supervised method for COVID-19 detection is presented. Our system is capable of learning from a mixture of limited labeled and unlabeled data where supervised learners fail due to a lack of sufficient amount of labeled data. Thus, our semi-supervised training method significantly outperforms the supervised training of Convolutional Neural Network (CNN) when labeled training data is scarce. The 95% confidence intervals for our method in terms of accuracy, sensitivity, and specificity are 99.56 +- 0.20%, 99.88 +- 0.24%, and 99.40 +- 0.18%, respectively, whereas intervals for the CNN (trained supervised) are 68.34 +- 4.11%, 91.2 +- 6.15%, and 46.40 +- 5.21%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.06388v2-abstract-full').style.display = 'none'; document.getElementById('2102.06388v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ACM Transactions on Multimedia Computing, Communications, and ApplicationsVolume 17Issue 3sOctober 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.00151">arXiv:2102.00151</a> <span> [<a href="https://arxiv.org/pdf/2102.00151">pdf</a>, <a href="https://arxiv.org/format/2102.00151">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Expressive Neural Voice Cloning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Neekhara%2C+P">Paarth Neekhara</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/eess?searchtype=author&query=Dubnov%2C+S">Shlomo Dubnov</a>, <a href="/search/eess?searchtype=author&query=Koushanfar%2C+F">Farinaz Koushanfar</a>, <a href="/search/eess?searchtype=author&query=McAuley%2C+J">Julian McAuley</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.00151v1-abstract-short" style="display: inline;"> Voice cloning is the task of learning to synthesize the voice of an unseen speaker from a few samples. While current voice cloning methods achieve promising results in Text-to-Speech (TTS) synthesis for a new voice, these approaches lack the ability to control the expressiveness of synthesized audio. In this work, we propose a controllable voice cloning method that allows fine-grained control over… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.00151v1-abstract-full').style.display = 'inline'; document.getElementById('2102.00151v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.00151v1-abstract-full" style="display: none;"> Voice cloning is the task of learning to synthesize the voice of an unseen speaker from a few samples. While current voice cloning methods achieve promising results in Text-to-Speech (TTS) synthesis for a new voice, these approaches lack the ability to control the expressiveness of synthesized audio. In this work, we propose a controllable voice cloning method that allows fine-grained control over various style aspects of the synthesized speech for an unseen speaker. We achieve this by explicitly conditioning the speech synthesis model on a speaker encoding, pitch contour and latent style tokens during training. Through both quantitative and qualitative evaluations, we show that our framework can be used for various expressive voice cloning tasks using only a few transcribed or untranscribed speech samples for a new speaker. These cloning tasks include style transfer from a reference speech, synthesizing speech directly from text, and fine-grained style control by manipulating the style conditioning variables during inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.00151v1-abstract-full').style.display = 'none'; document.getElementById('2102.00151v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 2 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.13402">arXiv:2009.13402</a> <span> [<a href="https://arxiv.org/pdf/2009.13402">pdf</a>, <a href="https://arxiv.org/format/2009.13402">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> EEG based Major Depressive disorder and Bipolar disorder detection using Neural Networks: A review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yasin%2C+S">Sana Yasin</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S+A">Syed Asad Hussain</a>, <a href="/search/eess?searchtype=author&query=Aslan%2C+S">Sinem Aslan</a>, <a href="/search/eess?searchtype=author&query=Raza%2C+I">Imran Raza</a>, <a href="/search/eess?searchtype=author&query=Muzammel%2C+M">Muhammad Muzammel</a>, <a href="/search/eess?searchtype=author&query=Othmani%2C+A">Alice Othmani</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.13402v2-abstract-short" style="display: inline;"> Mental disorders represent critical public health challenges as they are leading contributors to the global burden of disease and intensely influence social and financial welfare of individuals. The present comprehensive review concentrate on the two mental disorders: Major depressive Disorder (MDD) and Bipolar Disorder (BD) with noteworthy publications during the last ten years. There is a big ne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.13402v2-abstract-full').style.display = 'inline'; document.getElementById('2009.13402v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.13402v2-abstract-full" style="display: none;"> Mental disorders represent critical public health challenges as they are leading contributors to the global burden of disease and intensely influence social and financial welfare of individuals. The present comprehensive review concentrate on the two mental disorders: Major depressive Disorder (MDD) and Bipolar Disorder (BD) with noteworthy publications during the last ten years. There is a big need nowadays for phenotypic characterization of psychiatric disorders with biomarkers. Electroencephalography (EEG) signals could offer a rich signature for MDD and BD and then they could improve understanding of pathophysiological mechanisms underling these mental disorders. In this review, we focus on the literature works adopting neural networks fed by EEG signals. Among those studies using EEG and neural networks, we have discussed a variety of EEG based protocols, biomarkers and public datasets for depression and bipolar disorder detection. We conclude with a discussion and valuable recommendations that will help to improve the reliability of developed models and for more accurate and more deterministic computational intelligence based systems in psychiatry. This review will prove to be a structured and valuable initial point for the researchers working on depression and bipolar disorders recognition by using EEG signals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.13402v2-abstract-full').style.display = 'none'; document.getElementById('2009.13402v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages,2 figures and 18 Tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.10785">arXiv:2007.10785</a> <span> [<a href="https://arxiv.org/pdf/2007.10785">pdf</a>, <a href="https://arxiv.org/format/2007.10785">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.neucom.2024.127317">10.1016/j.neucom.2024.127317 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Automated Detection and Forecasting of COVID-19 using Deep Learning Techniques: A Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shoeibi%2C+A">Afshin Shoeibi</a>, <a href="/search/eess?searchtype=author&query=Khodatars%2C+M">Marjane Khodatars</a>, <a href="/search/eess?searchtype=author&query=Jafari%2C+M">Mahboobeh Jafari</a>, <a href="/search/eess?searchtype=author&query=Ghassemi%2C+N">Navid Ghassemi</a>, <a href="/search/eess?searchtype=author&query=Sadeghi%2C+D">Delaram Sadeghi</a>, <a href="/search/eess?searchtype=author&query=Moridian%2C+P">Parisa Moridian</a>, <a href="/search/eess?searchtype=author&query=Khadem%2C+A">Ali Khadem</a>, <a href="/search/eess?searchtype=author&query=Alizadehsani%2C+R">Roohallah Alizadehsani</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sadiq Hussain</a>, <a href="/search/eess?searchtype=author&query=Zare%2C+A">Assef Zare</a>, <a href="/search/eess?searchtype=author&query=Sani%2C+Z+A">Zahra Alizadeh Sani</a>, <a href="/search/eess?searchtype=author&query=Khozeimeh%2C+F">Fahime Khozeimeh</a>, <a href="/search/eess?searchtype=author&query=Nahavandi%2C+S">Saeid Nahavandi</a>, <a href="/search/eess?searchtype=author&query=Acharya%2C+U+R">U. Rajendra Acharya</a>, <a href="/search/eess?searchtype=author&query=Gorriz%2C+J+M">Juan M. Gorriz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.10785v7-abstract-short" style="display: inline;"> Coronavirus, or COVID-19, is a hazardous disease that has endangered the health of many people around the world by directly affecting the lungs. COVID-19 is a medium-sized, coated virus with a single-stranded RNA, and also has one of the largest RNA genomes and is approximately 120 nm. The X-Ray and computed tomography (CT) imaging modalities are widely used to obtain a fast and accurate medical d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.10785v7-abstract-full').style.display = 'inline'; document.getElementById('2007.10785v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.10785v7-abstract-full" style="display: none;"> Coronavirus, or COVID-19, is a hazardous disease that has endangered the health of many people around the world by directly affecting the lungs. COVID-19 is a medium-sized, coated virus with a single-stranded RNA, and also has one of the largest RNA genomes and is approximately 120 nm. The X-Ray and computed tomography (CT) imaging modalities are widely used to obtain a fast and accurate medical diagnosis. Identifying COVID-19 from these medical images is extremely challenging as it is time-consuming and prone to human errors. Hence, artificial intelligence (AI) methodologies can be used to obtain consistent high performance. Among the AI methods, deep learning (DL) networks have gained popularity recently compared to conventional machine learning (ML). Unlike ML, all stages of feature extraction, feature selection, and classification are accomplished automatically in DL models. In this paper, a complete survey of studies on the application of DL techniques for COVID-19 diagnostic and segmentation of lungs is discussed, concentrating on works that used X-Ray and CT images. Additionally, a review of papers on the forecasting of coronavirus prevalence in different parts of the world with DL is presented. Lastly, the challenges faced in the detection of COVID-19 using DL techniques and directions for future research are discussed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.10785v7-abstract-full').style.display = 'none'; document.getElementById('2007.10785v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.04133">arXiv:2007.04133</a> <span> [<a href="https://arxiv.org/pdf/2007.04133">pdf</a>, <a href="https://arxiv.org/format/2007.04133">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Energy Optimization in Ultra-Dense Radio Access Networks via Traffic-Aware Cell Switching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ozturk%2C+M">Metin Ozturk</a>, <a href="/search/eess?searchtype=author&query=Abubakar%2C+A+I">Attai Ibrahim Abubakar</a>, <a href="/search/eess?searchtype=author&query=Nadas%2C+J+P+B">Jo茫o Pedro Battistella Nadas</a>, <a href="/search/eess?searchtype=author&query=Rais%2C+R+N+B">Rao Naveed Bin Rais</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sajjad Hussain</a>, <a href="/search/eess?searchtype=author&query=Imran%2C+M+A">Muhammad Ali Imran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.04133v1-abstract-short" style="display: inline;"> Ultra-dense deployments in 5G, the next generation of cellular networks, are an alternative to provide ultra-high throughput by bringing the users closer to the base stations. On the other hand, 5G deployments must not incur a large increase in energy consumption in order to keep them cost-effective and most importantly to reduce the carbon footprint of cellular networks. We propose a reinforcemen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.04133v1-abstract-full').style.display = 'inline'; document.getElementById('2007.04133v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.04133v1-abstract-full" style="display: none;"> Ultra-dense deployments in 5G, the next generation of cellular networks, are an alternative to provide ultra-high throughput by bringing the users closer to the base stations. On the other hand, 5G deployments must not incur a large increase in energy consumption in order to keep them cost-effective and most importantly to reduce the carbon footprint of cellular networks. We propose a reinforcement learning cell switching algorithm, to minimize the energy consumption in ultra-dense deployments without compromising the quality of service (QoS) experienced by the users. In this regard, the proposed algorithm can intelligently learn which small cells (SCs) to turn off at any given time based on the traffic load of the SCs and the macro cell. To validate the idea, we used the open call detail record (CDR) data set from the city of Milan, Italy, and tested our algorithm against typical operational benchmark solutions. With the obtained results, we demonstrate exactly when and how the proposed algorithm can provide energy savings, and moreover how this happens without reducing QoS of users. Most importantly, we show that our solution has a very similar performance to the exhaustive search, with the advantage of being scalable and less complex. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.04133v1-abstract-full').style.display = 'none'; document.getElementById('2007.04133v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 7 Figures, Submitted to IEEE Transactions on Green Communications and Networking (IEEETGCN)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.01285">arXiv:2007.01285</a> <span> [<a href="https://arxiv.org/pdf/2007.01285">pdf</a>, <a href="https://arxiv.org/format/2007.01285">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.compbiomed.2021.104949">10.1016/j.compbiomed.2021.104949 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep Learning for Neuroimaging-based Diagnosis and Rehabilitation of Autism Spectrum Disorder: A Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Khodatars%2C+M">Marjane Khodatars</a>, <a href="/search/eess?searchtype=author&query=Shoeibi%2C+A">Afshin Shoeibi</a>, <a href="/search/eess?searchtype=author&query=Sadeghi%2C+D">Delaram Sadeghi</a>, <a href="/search/eess?searchtype=author&query=Ghassemi%2C+N">Navid Ghassemi</a>, <a href="/search/eess?searchtype=author&query=Jafari%2C+M">Mahboobeh Jafari</a>, <a href="/search/eess?searchtype=author&query=Moridian%2C+P">Parisa Moridian</a>, <a href="/search/eess?searchtype=author&query=Khadem%2C+A">Ali Khadem</a>, <a href="/search/eess?searchtype=author&query=Alizadehsani%2C+R">Roohallah Alizadehsani</a>, <a href="/search/eess?searchtype=author&query=Zare%2C+A">Assef Zare</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+Y">Yinan Kong</a>, <a href="/search/eess?searchtype=author&query=Khosravi%2C+A">Abbas Khosravi</a>, <a href="/search/eess?searchtype=author&query=Nahavandi%2C+S">Saeid Nahavandi</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sadiq Hussain</a>, <a href="/search/eess?searchtype=author&query=Acharya%2C+U+R">U. Rajendra Acharya</a>, <a href="/search/eess?searchtype=author&query=Berk%2C+M">Michael Berk</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.01285v4-abstract-short" style="display: inline;"> Accurate diagnosis of Autism Spectrum Disorder (ASD) followed by effective rehabilitation is essential for the management of this disorder. Artificial intelligence (AI) techniques can aid physicians to apply automatic diagnosis and rehabilitation procedures. AI techniques comprise traditional machine learning (ML) approaches and deep learning (DL) techniques. Conventional ML methods employ various… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.01285v4-abstract-full').style.display = 'inline'; document.getElementById('2007.01285v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.01285v4-abstract-full" style="display: none;"> Accurate diagnosis of Autism Spectrum Disorder (ASD) followed by effective rehabilitation is essential for the management of this disorder. Artificial intelligence (AI) techniques can aid physicians to apply automatic diagnosis and rehabilitation procedures. AI techniques comprise traditional machine learning (ML) approaches and deep learning (DL) techniques. Conventional ML methods employ various feature extraction and classification techniques, but in DL, the process of feature extraction and classification is accomplished intelligently and integrally. DL methods for diagnosis of ASD have been focused on neuroimaging-based approaches. Neuroimaging techniques are non-invasive disease markers potentially useful for ASD diagnosis. Structural and functional neuroimaging techniques provide physicians substantial information about the structure (anatomy and structural connectivity) and function (activity and functional connectivity) of the brain. Due to the intricate structure and function of the brain, proposing optimum procedures for ASD diagnosis with neuroimaging data without exploiting powerful AI techniques like DL may be challenging. In this paper, studies conducted with the aid of DL networks to distinguish ASD are investigated. Rehabilitation tools provided for supporting ASD patients utilizing DL networks are also assessed. Finally, we will present important challenges in the automated detection and rehabilitation of ASD and propose some future works. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.01285v4-abstract-full').style.display = 'none'; document.getElementById('2007.01285v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Computers in Biology and Medicine, Volume 139, 2021, 104949 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.01276">arXiv:2007.01276</a> <span> [<a href="https://arxiv.org/pdf/2007.01276">pdf</a>, <a href="https://arxiv.org/format/2007.01276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/ijerph18115780">10.3390/ijerph18115780 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Epileptic Seizures Detection Using Deep Learning Techniques: A Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shoeibi%2C+A">Afshin Shoeibi</a>, <a href="/search/eess?searchtype=author&query=Khodatars%2C+M">Marjane Khodatars</a>, <a href="/search/eess?searchtype=author&query=Ghassemi%2C+N">Navid Ghassemi</a>, <a href="/search/eess?searchtype=author&query=Jafari%2C+M">Mahboobeh Jafari</a>, <a href="/search/eess?searchtype=author&query=Moridian%2C+P">Parisa Moridian</a>, <a href="/search/eess?searchtype=author&query=Alizadehsani%2C+R">Roohallah Alizadehsani</a>, <a href="/search/eess?searchtype=author&query=Panahiazar%2C+M">Maryam Panahiazar</a>, <a href="/search/eess?searchtype=author&query=Khozeimeh%2C+F">Fahime Khozeimeh</a>, <a href="/search/eess?searchtype=author&query=Zare%2C+A">Assef Zare</a>, <a href="/search/eess?searchtype=author&query=Hosseini-Nejad%2C+H">Hossein Hosseini-Nejad</a>, <a href="/search/eess?searchtype=author&query=Khosravi%2C+A">Abbas Khosravi</a>, <a href="/search/eess?searchtype=author&query=Atiya%2C+A+F">Amir F. Atiya</a>, <a href="/search/eess?searchtype=author&query=Aminshahidi%2C+D">Diba Aminshahidi</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sadiq Hussain</a>, <a href="/search/eess?searchtype=author&query=Rouhani%2C+M">Modjtaba Rouhani</a>, <a href="/search/eess?searchtype=author&query=Nahavandi%2C+S">Saeid Nahavandi</a>, <a href="/search/eess?searchtype=author&query=Acharya%2C+U+R">Udyavara Rajendra Acharya</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.01276v3-abstract-short" style="display: inline;"> A variety of screening approaches have been proposed to diagnose epileptic seizures, using electroencephalography (EEG) and magnetic resonance imaging (MRI) modalities. Artificial intelligence encompasses a variety of areas, and one of its branches is deep learning (DL). Before the rise of DL, conventional machine learning algorithms involving feature extraction were performed. This limited their… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.01276v3-abstract-full').style.display = 'inline'; document.getElementById('2007.01276v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.01276v3-abstract-full" style="display: none;"> A variety of screening approaches have been proposed to diagnose epileptic seizures, using electroencephalography (EEG) and magnetic resonance imaging (MRI) modalities. Artificial intelligence encompasses a variety of areas, and one of its branches is deep learning (DL). Before the rise of DL, conventional machine learning algorithms involving feature extraction were performed. This limited their performance to the ability of those handcrafting the features. However, in DL, the extraction of features and classification are entirely automated. The advent of these techniques in many areas of medicine, such as in the diagnosis of epileptic seizures, has made significant advances. In this study, a comprehensive overview of works focused on automated epileptic seizure detection using DL techniques and neuroimaging modalities is presented. Various methods proposed to diagnose epileptic seizures automatically using EEG and MRI modalities are described. In addition, rehabilitation systems developed for epileptic seizures using DL have been analyzed, and a summary is provided. The rehabilitation tools include cloud computing techniques and hardware required for implementation of DL algorithms. The important challenges in accurate detection of automated epileptic seizures using DL with EEG and MRI modalities are discussed. The advantages and limitations in employing DL-based techniques for epileptic seizures diagnosis are presented. Finally, the most promising DL models proposed and possible future works on automated epileptic seizure detection are delineated. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.01276v3-abstract-full').style.display = 'none'; document.getElementById('2007.01276v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> International Journal of Environmental Research and Public Health. 2021; 18(11):5780 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.00407">arXiv:2005.00407</a> <span> [<a href="https://arxiv.org/pdf/2005.00407">pdf</a>, <a href="https://arxiv.org/format/2005.00407">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Context-Aware Wireless Connectivity and Processing Unit Optimization for IoT Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ozturk%2C+M">Metin Ozturk</a>, <a href="/search/eess?searchtype=author&query=Abubakar%2C+A+I">Attai Ibrahim Abubakar</a>, <a href="/search/eess?searchtype=author&query=Rais%2C+R+N+B">Rao Naveed Bin Rais</a>, <a href="/search/eess?searchtype=author&query=Jaber%2C+M">Mona Jaber</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Sajjad Hussain</a>, <a href="/search/eess?searchtype=author&query=Imran%2C+M+A">Muhammad Ali Imran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.00407v1-abstract-short" style="display: inline;"> A novel approach is presented in this work for context-aware connectivity and processing optimization of Internet of things (IoT) networks. Different from the state-of-the-art approaches, the proposed approach simultaneously selects the best connectivity and processing unit (e.g., device, fog, and cloud) along with the percentage of data to be offloaded by jointly optimizing energy consumption, re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.00407v1-abstract-full').style.display = 'inline'; document.getElementById('2005.00407v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.00407v1-abstract-full" style="display: none;"> A novel approach is presented in this work for context-aware connectivity and processing optimization of Internet of things (IoT) networks. Different from the state-of-the-art approaches, the proposed approach simultaneously selects the best connectivity and processing unit (e.g., device, fog, and cloud) along with the percentage of data to be offloaded by jointly optimizing energy consumption, response-time, security, and monetary cost. The proposed scheme employs a reinforcement learning algorithm, and manages to achieve significant gains compared to deterministic solutions. In particular, the requirements of IoT devices in terms of response-time and security are taken as inputs along with the remaining battery level of the devices, and the developed algorithm returns an optimized policy. The results obtained show that only our method is able to meet the holistic multi-objective optimisation criteria, albeit, the benchmark approaches may achieve better results on a particular metric at the cost of failing to reach the other targets. Thus, the proposed approach is a device-centric and context-aware solution that accounts for the monetary and battery constraints. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.00407v1-abstract-full').style.display = 'none'; document.getElementById('2005.00407v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.04864">arXiv:2004.04864</a> <span> [<a href="https://arxiv.org/pdf/2004.04864">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Vehicle Intrusion And Theft Control System Using GSM and GPS -- An advance and viable approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Mustafa%2C+A">Ashad Mustafa</a>, <a href="/search/eess?searchtype=author&query=Jameel%2C+H">Hassan Jameel</a>, <a href="/search/eess?searchtype=author&query=Baqar%2C+M">Mohtashim Baqar</a>, <a href="/search/eess?searchtype=author&query=Khan%2C+R+A">Rameez Ahmed Khan</a>, <a href="/search/eess?searchtype=author&query=Yaqoob%2C+Z+M">Zeeshan M Yaqoob</a>, <a href="/search/eess?searchtype=author&query=Rahim%2C+Z">Zeeshan Rahim</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S+S">Syed Safdar Hussain</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.04864v1-abstract-short" style="display: inline;"> This paper presents a novel approach towards the designing and development of a feasible and an embedded vehicle intrusion and theft control system using GSM (Global System for Mobile Communication) and GPS (Global Positioning System). The proposed system uses GSM technology as one of the distinguishing building blocks of the system. A GPS module Holux GR89 is used to trace the position of the veh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.04864v1-abstract-full').style.display = 'inline'; document.getElementById('2004.04864v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.04864v1-abstract-full" style="display: none;"> This paper presents a novel approach towards the designing and development of a feasible and an embedded vehicle intrusion and theft control system using GSM (Global System for Mobile Communication) and GPS (Global Positioning System). The proposed system uses GSM technology as one of the distinguishing building blocks of the system. A GPS module Holux GR89 is used to trace the position of the vehicle and Mercury Switches are used to collect analog data continuously, in case of an intrusion, variations will be observed in sensors reading. Continuous readings from sensors are collected on to the microcontroller constantly and on the basis of those readings decision is taken whether an intrusion is made or not and in case of an intrusion a message from a predefined set of messages is generated to the owner of the vehicle and on reception of the message, the owner will have the luxury to take an action via an SMS either to lock the gears of the vehicle or seize the engine of the vehicle from a far-off place. A relay is used to control gears and engine of the vehicle while working with the microcontroller. A prototype system was built and tested. The results were very positive and encouraging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.04864v1-abstract-full').style.display = 'none'; document.getElementById('2004.04864v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Journal Article</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Asian journal of engineering, sciences & technology, 2(2) (2012) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.04971">arXiv:2002.04971</a> <span> [<a href="https://arxiv.org/pdf/2002.04971">pdf</a>, <a href="https://arxiv.org/format/2002.04971">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICCAD45719.2019.8942122">10.1109/ICCAD45719.2019.8942122 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> FastWave: Accelerating Autoregressive Convolutional Neural Networks on FPGA </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/eess?searchtype=author&query=Javaheripi%2C+M">Mojan Javaheripi</a>, <a href="/search/eess?searchtype=author&query=Neekhara%2C+P">Paarth Neekhara</a>, <a href="/search/eess?searchtype=author&query=Kastner%2C+R">Ryan Kastner</a>, <a href="/search/eess?searchtype=author&query=Koushanfar%2C+F">Farinaz Koushanfar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.04971v1-abstract-short" style="display: inline;"> Autoregressive convolutional neural networks (CNNs) have been widely exploited for sequence generation tasks such as audio synthesis, language modeling and neural machine translation. WaveNet is a deep autoregressive CNN composed of several stacked layers of dilated convolution that is used for sequence generation. While WaveNet produces state-of-the art audio generation results, the naive inferen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.04971v1-abstract-full').style.display = 'inline'; document.getElementById('2002.04971v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.04971v1-abstract-full" style="display: none;"> Autoregressive convolutional neural networks (CNNs) have been widely exploited for sequence generation tasks such as audio synthesis, language modeling and neural machine translation. WaveNet is a deep autoregressive CNN composed of several stacked layers of dilated convolution that is used for sequence generation. While WaveNet produces state-of-the art audio generation results, the naive inference implementation is quite slow; it takes a few minutes to generate just one second of audio on a high-end GPU. In this work, we develop the first accelerator platform~\textit{FastWave} for autoregressive convolutional neural networks, and address the associated design challenges. We design the Fast-Wavenet inference model in Vivado HLS and perform a wide range of optimizations including fixed-point implementation, array partitioning and pipelining. Our model uses a fully parameterized parallel architecture for fast matrix-vector multiplication that enables per-layer customized latency fine-tuning for further throughput improvement. Our experiments comparatively assess the trade-off between throughput and resource utilization for various optimizations. Our best WaveNet design on the Xilinx XCVU13P FPGA that uses only on-chip memory, achieves 66 faster generation speed compared to CPU implementation and 11 faster generation speed than GPU implementation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.04971v1-abstract-full').style.display = 'none'; document.getElementById('2002.04971v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at ICCAD 2019</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> @inproceedings {1143,booktitle = {IEEE/ACM 2019 International Conference On Computer Aided Design (ICCAD)},year = {2019},month = {November}} </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1908.02510">arXiv:1908.02510</a> <span> [<a href="https://arxiv.org/pdf/1908.02510">pdf</a>, <a href="https://arxiv.org/format/1908.02510">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Quantum Calculus-based Volterra LMS for Nonlinear Channel Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Usman%2C+M">Muhammad Usman</a>, <a href="/search/eess?searchtype=author&query=Ibrahim%2C+M+S">Muhammad Sohail Ibrahim</a>, <a href="/search/eess?searchtype=author&query=Ahmad%2C+J">Jawwad Ahmad</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S+S">Syed Saiq Hussain</a>, <a href="/search/eess?searchtype=author&query=Moinuddin%2C+M">Muhammad Moinuddin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1908.02510v1-abstract-short" style="display: inline;"> A novel adaptive filtering method called $q$-Volterra least mean square ($q$-VLMS) is presented in this paper. The $q$-VLMS is a nonlinear extension of conventional LMS and it is based on Jackson's derivative also known as $q$-calculus. In Volterra LMS, due to large variance of input signal the convergence speed is very low. With proper manipulation we successfully improved the convergence perform… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.02510v1-abstract-full').style.display = 'inline'; document.getElementById('1908.02510v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1908.02510v1-abstract-full" style="display: none;"> A novel adaptive filtering method called $q$-Volterra least mean square ($q$-VLMS) is presented in this paper. The $q$-VLMS is a nonlinear extension of conventional LMS and it is based on Jackson's derivative also known as $q$-calculus. In Volterra LMS, due to large variance of input signal the convergence speed is very low. With proper manipulation we successfully improved the convergence performance of the Volterra LMS. The proposed algorithm is analyzed for the step-size bounds and results of analysis are verified through computer simulations for nonlinear channel estimation problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.02510v1-abstract-full').style.display = 'none'; document.getElementById('1908.02510v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.03828">arXiv:1905.03828</a> <span> [<a href="https://arxiv.org/pdf/1905.03828">pdf</a>, <a href="https://arxiv.org/format/1905.03828">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Universal Adversarial Perturbations for Speech Recognition Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Neekhara%2C+P">Paarth Neekhara</a>, <a href="/search/eess?searchtype=author&query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/eess?searchtype=author&query=Pandey%2C+P">Prakhar Pandey</a>, <a href="/search/eess?searchtype=author&query=Dubnov%2C+S">Shlomo Dubnov</a>, <a href="/search/eess?searchtype=author&query=McAuley%2C+J">Julian McAuley</a>, <a href="/search/eess?searchtype=author&query=Koushanfar%2C+F">Farinaz Koushanfar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.03828v2-abstract-short" style="display: inline;"> In this work, we demonstrate the existence of universal adversarial audio perturbations that cause mis-transcription of audio signals by automatic speech recognition (ASR) systems. We propose an algorithm to find a single quasi-imperceptible perturbation, which when added to any arbitrary speech signal, will most likely fool the victim speech recognition model. Our experiments demonstrate the appl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.03828v2-abstract-full').style.display = 'inline'; document.getElementById('1905.03828v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.03828v2-abstract-full" style="display: none;"> In this work, we demonstrate the existence of universal adversarial audio perturbations that cause mis-transcription of audio signals by automatic speech recognition (ASR) systems. We propose an algorithm to find a single quasi-imperceptible perturbation, which when added to any arbitrary speech signal, will most likely fool the victim speech recognition model. Our experiments demonstrate the application of our proposed technique by crafting audio-agnostic universal perturbations for the state-of-the-art ASR system -- Mozilla DeepSpeech. Additionally, we show that such perturbations generalize to a significant extent across models that are not available during training, by performing a transferability test on a WaveNet based ASR system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.03828v2-abstract-full').style.display = 'none'; document.getElementById('1905.03828v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at INTERSPEECH 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1812.00149">arXiv:1812.00149</a> <span> [<a href="https://arxiv.org/pdf/1812.00149">pdf</a>, <a href="https://arxiv.org/format/1812.00149">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> SwishNet: A Fast Convolutional Neural Network for Speech, Music and Noise Classification and Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hussain%2C+M+S">Md. Shamim Hussain</a>, <a href="/search/eess?searchtype=author&query=Haque%2C+M+A">Mohammad Ariful Haque</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1812.00149v1-abstract-short" style="display: inline;"> Speech, Music and Noise classification/segmentation is an important preprocessing step for audio processing/indexing. To this end, we propose a novel 1D Convolutional Neural Network (CNN) - SwishNet. It is a fast and lightweight architecture that operates on MFCC features which is suitable to be added to the front-end of an audio processing pipeline. We showed that the performance of our network c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.00149v1-abstract-full').style.display = 'inline'; document.getElementById('1812.00149v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1812.00149v1-abstract-full" style="display: none;"> Speech, Music and Noise classification/segmentation is an important preprocessing step for audio processing/indexing. To this end, we propose a novel 1D Convolutional Neural Network (CNN) - SwishNet. It is a fast and lightweight architecture that operates on MFCC features which is suitable to be added to the front-end of an audio processing pipeline. We showed that the performance of our network can be improved by distilling knowledge from a 2D CNN, pretrained on ImageNet. We investigated the performance of our network on the MUSAN corpus - an openly available comprehensive collection of noise, music and speech samples, suitable for deep learning. The proposed network achieved high overall accuracy in clip (length of 0.5-2s) classification (>97% accuracy) and frame-wise segmentation (>93% accuracy) tasks with even higher accuracy (>99%) in speech/non-speech discrimination task. To verify the robustness of our model, we trained it on MUSAN and evaluated it on a different corpus - GTZAN and found good accuracy with very little fine-tuning. We also demonstrated that our model is fast on both CPU and GPU, consumes a low amount of memory and is suitable for implementation in embedded systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.00149v1-abstract-full').style.display = 'none'; document.getElementById('1812.00149v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 3 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1302.0017">arXiv:1302.0017</a> <span> [<a href="https://arxiv.org/pdf/1302.0017">pdf</a>, <a href="https://arxiv.org/ps/1302.0017">ps</a>, <a href="https://arxiv.org/format/1302.0017">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Control of Scalar Plants in the Presence of Unmodeled Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hussain%2C+H+S">Heather S. Hussain</a>, <a href="/search/eess?searchtype=author&query=Matsutani%2C+M+M">Megumi M. Matsutani</a>, <a href="/search/eess?searchtype=author&query=Annaswamy%2C+A+M">Anuradha M. Annaswamy</a>, <a href="/search/eess?searchtype=author&query=Lavretsky%2C+E">Eugene Lavretsky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1302.0017v1-abstract-short" style="display: inline;"> Robust adaptive control of scalar plants in the presence of unmodeled dynamics is established in this paper. It is shown that implementation of a projection algorithm with standard adaptive control of a scalar plant ensures global boundedness of the overall adaptive system for a class of unmodeled dynamics. </span> <span class="abstract-full has-text-grey-dark mathjax" id="1302.0017v1-abstract-full" style="display: none;"> Robust adaptive control of scalar plants in the presence of unmodeled dynamics is established in this paper. It is shown that implementation of a projection algorithm with standard adaptive control of a scalar plant ensures global boundedness of the overall adaptive system for a class of unmodeled dynamics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1302.0017v1-abstract-full').style.display = 'none'; document.getElementById('1302.0017v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2013. </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository