CINXE.COM
Sound Mar 2022
<!DOCTYPE html> <html lang="en"> <head> <title>Sound Mar 2022</title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a> <!-- start desktop header --> <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/cs.SD/recent">cs.SD</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div><!-- /end desktop header --> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div><!-- /end mobile-header --> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Sound</h1> <h2>Authors and titles for March 2022 </h2> <div class='paging'>Total of 322 entries : <span>1-50</span> <a href=/list/cs.SD/2022-03?skip=50&show=50>51-100</a> <a href=/list/cs.SD/2022-03?skip=100&show=50>101-150</a> <a href=/list/cs.SD/2022-03?skip=150&show=50>151-200</a> <span>...</span> <a href=/list/cs.SD/2022-03?skip=300&show=50>301-322</a> </div> <div class='morefewer'>Showing up to 50 entries per page: <a href=/list/cs.SD/2022-03?skip=0&show=25 rel="nofollow"> fewer</a> | <a href=/list/cs.SD/2022-03?skip=0&show=100 rel="nofollow"> more</a> | <a href=/list/cs.SD/2022-03?skip=0&show=2000 rel="nofollow"> all</a> </div> <dl id='articles'> <dt> <a name='item1'>[1]</a> <a href ="/abs/2203.00232" title="Abstract" id="2203.00232"> arXiv:2203.00232 </a> [<a href="/pdf/2203.00232" title="Download PDF" id="pdf-2203.00232" aria-labelledby="pdf-2203.00232">pdf</a>, <a href="/format/2203.00232" title="Other formats" id="oth-2203.00232" aria-labelledby="oth-2203.00232">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+X">Xuankai Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moritz,+N">Niko Moritz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hori,+T">Takaaki Hori</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Watanabe,+S">Shinji Watanabe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Roux,+J+L">Jonathan Le Roux</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> To appear in ICASSP2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Computation and Language (cs.CL); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2203.00472" title="Abstract" id="2203.00472"> arXiv:2203.00472 </a> [<a href="/pdf/2203.00472" title="Download PDF" id="pdf-2203.00472" aria-labelledby="pdf-2203.00472">pdf</a>, <a href="/format/2203.00472" title="Other formats" id="oth-2203.00472" aria-labelledby="oth-2203.00472">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DMF-Net: A decoupling-style multi-band fusion model for full-band speech enhancement </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+G">Guochen Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guan,+Y">Yuansheng Guan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meng,+W">Weixin Meng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+C">Chengshi Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Hui Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2203.00513" title="Abstract" id="2203.00513"> arXiv:2203.00513 </a> [<a href="/pdf/2203.00513" title="Download PDF" id="pdf-2203.00513" aria-labelledby="pdf-2203.00513">pdf</a>, <a href="/format/2203.00513" title="Other formats" id="oth-2203.00513" aria-labelledby="oth-2203.00513">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A comparative study of several parameterizations for speaker recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Faundez-Zanuy,+M">Marcos Faundez-Zanuy</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 4 pages </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> 2000 10th European Signal Processing Conference, 2000 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2203.00725" title="Abstract" id="2203.00725"> arXiv:2203.00725 </a> [<a href="/pdf/2203.00725" title="Download PDF" id="pdf-2203.00725" aria-labelledby="pdf-2203.00725">pdf</a>, <a href="/format/2203.00725" title="Other formats" id="oth-2203.00725" aria-labelledby="oth-2203.00725">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Conformer Based Acoustic Model for Robust Automatic Speech Recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yufeng Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+P">Peidong Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+D">DeLiang Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages, 2 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2203.00951" title="Abstract" id="2203.00951"> arXiv:2203.00951 </a> [<a href="/pdf/2203.00951" title="Download PDF" id="pdf-2203.00951" aria-labelledby="pdf-2203.00951">pdf</a>, <a href="/format/2203.00951" title="Other formats" id="oth-2203.00951" aria-labelledby="oth-2203.00951">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Speaker Adaption with Intuitive Prosodic Features for Statistical Parametric Speech Synthesis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+P">Pengyu Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ling,+Z">Zhenhua Ling</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by ICDSP2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2203.01080" title="Abstract" id="2203.01080"> arXiv:2203.01080 </a> [<a href="/pdf/2203.01080" title="Download PDF" id="pdf-2203.01080" aria-labelledby="pdf-2203.01080">pdf</a>, <a href="/format/2203.01080" title="Other formats" id="oth-2203.01080" aria-labelledby="oth-2203.01080">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Multi-Scale Time-Frequency Spectrogram Discriminator for GAN-based Non-Autoregressive TTS </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+H">Haohan Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+H">Hui Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+X">Xixin Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meng,+H">Helen Meng</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to INTERSPEECH 2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2203.01118" title="Abstract" id="2203.01118"> arXiv:2203.01118 </a> [<a href="/pdf/2203.01118" title="Download PDF" id="pdf-2203.01118" aria-labelledby="pdf-2203.01118">pdf</a>, <a href="/format/2203.01118" title="Other formats" id="oth-2203.01118" aria-labelledby="oth-2203.01118">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A multi-task learning for cavitation detection and cavitation intensity recognition of valve acoustic signals </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sha,+Y">Yu Sha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Faber,+J">Johannes Faber</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gou,+S">Shuiping Gou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+B">Bo Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+W">Wei Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schramm,+S">Stefan Schramm</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stoecker,+H">Horst Stoecker</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Steckenreiter,+T">Thomas Steckenreiter</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vnucec,+D">Domagoj Vnucec</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wetzstein,+N">Nadine Wetzstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Widl,+A">Andreas Widl</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+K">Kai Zhou</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> arXiv admin note: text overlap with <a href="https://arxiv.org/abs/2202.13226" data-arxiv-id="2202.13226" class="link-https">arXiv:2202.13226</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Engineering Applications of Artificial Intelligence, 113 (2022), 104904 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2203.01164" title="Abstract" id="2203.01164"> arXiv:2203.01164 </a> [<a href="/pdf/2203.01164" title="Download PDF" id="pdf-2203.01164" aria-labelledby="pdf-2203.01164">pdf</a>, <a href="/format/2203.01164" title="Other formats" id="oth-2203.01164" aria-labelledby="oth-2203.01164">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Speaker recognition improvement using blind inversion of distortions </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Faundez-Zanuy,+M">Marcos Faundez-Zanuy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sole-Casals,+J">Jordi Sole-Casals</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 4 pages </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EUSIPCO 2004, Vienna </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2203.01205" title="Abstract" id="2203.01205"> arXiv:2203.01205 </a> [<a href="/pdf/2203.01205" title="Download PDF" id="pdf-2203.01205" aria-labelledby="pdf-2203.01205">pdf</a>, <a href="/format/2203.01205" title="Other formats" id="oth-2203.01205" aria-labelledby="oth-2203.01205">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Audio Self-supervised Learning: A Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+S">Shuo Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mallol-Ragolta,+A">Adria Mallol-Ragolta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Parada-Cabeleiro,+E">Emilia Parada-Cabeleiro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qian,+K">Kun Qian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jing,+X">Xin Jing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kathan,+A">Alexander Kathan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+B">Bin Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schuller,+B+W">Bjoern W. Schuller</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2203.01429" title="Abstract" id="2203.01429"> arXiv:2203.01429 </a> [<a href="/pdf/2203.01429" title="Download PDF" id="pdf-2203.01429" aria-labelledby="pdf-2203.01429">pdf</a>, <a href="/format/2203.01429" title="Other formats" id="oth-2203.01429" aria-labelledby="oth-2203.01429">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SMTNet: Hierarchical cavitation intensity recognition based on sub-main transfer network </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sha,+Y">Yu Sha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Faber,+J">Johannes Faber</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gou,+S">Shuiping Gou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+B">Bo Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+W">Wei Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schramm,+S">Stefan Schramm</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stoecker,+H">Horst Stoecker</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Steckenreiter,+T">Thomas Steckenreiter</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vnucec,+D">Domagoj Vnucec</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wetzstein,+N">Nadine Wetzstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Widl,+A">Andreas Widl</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+K">Kai Zhou</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> we need update this paper </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item11'>[11]</a> <a href ="/abs/2203.01786" title="Abstract" id="2203.01786"> arXiv:2203.01786 </a> [<a href="/pdf/2203.01786" title="Download PDF" id="pdf-2203.01786" aria-labelledby="pdf-2203.01786">pdf</a>, <a href="/format/2203.01786" title="Other formats" id="oth-2203.01786" aria-labelledby="oth-2203.01786">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generative Modeling for Low Dimensional Speech Attributes with Neural Spline Flows </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shih,+K+J">Kevin J. Shih</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Valle,+R">Rafael Valle</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Badlani,+R">Rohan Badlani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Santos,+J+F">Jo茫o Felipe Santos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Catanzaro,+B">Bryan Catanzaro</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 22 pages, 11 figures, 3 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item12'>[12]</a> <a href ="/abs/2203.02020" title="Abstract" id="2203.02020"> arXiv:2203.02020 </a> [<a href="/pdf/2203.02020" title="Download PDF" id="pdf-2203.02020" aria-labelledby="pdf-2203.02020">pdf</a>, <a href="/format/2203.02020" title="Other formats" id="oth-2203.02020" aria-labelledby="oth-2203.02020">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Nonlinear predictive models computation in ADPCM schemes </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Faundez-Zanuy,+M">Marcos Faundez-Zanuy</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> 2000 10th European Signal Processing Conference, 2000, pp. 1-4 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item13'>[13]</a> <a href ="/abs/2203.02216" title="Abstract" id="2203.02216"> arXiv:2203.02216 </a> [<a href="/pdf/2203.02216" title="Download PDF" id="pdf-2203.02216" aria-labelledby="pdf-2203.02216">pdf</a>, <a href="/format/2203.02216" title="Other formats" id="oth-2203.02216" aria-labelledby="oth-2203.02216">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Look\&Listen: Multi-Modal Correlation Learning for Active Speaker Detection and Speech Enhancement </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xiong,+J">Junwen Xiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Y">Yu Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+P">Peng Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+L">Lei Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+W">Wei Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zha,+Y">Yufei Zha</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 8figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Multimedia (cs.MM); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item14'>[14]</a> <a href ="/abs/2203.02395" title="Abstract" id="2203.02395"> arXiv:2203.02395 </a> [<a href="/pdf/2203.02395" title="Download PDF" id="pdf-2203.02395" aria-labelledby="pdf-2203.02395">pdf</a>, <a href="/format/2203.02395" title="Other formats" id="oth-2203.02395" aria-labelledby="oth-2203.02395">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> iSTFTNet: Fast and Lightweight Mel-Spectrogram Vocoder Incorporating Inverse Short-Time Fourier Transform </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kaneko,+T">Takuhiro Kaneko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tanaka,+K">Kou Tanaka</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kameoka,+H">Hirokazu Kameoka</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Seki,+S">Shogo Seki</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to ICASSP 2022. Project page: <a href="https://www.kecl.ntt.co.jp/people/kaneko.takuhiro/projects/istftnet/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS); Machine Learning (stat.ML) </div> </div> </dd> <dt> <a name='item15'>[15]</a> <a href ="/abs/2203.02483" title="Abstract" id="2203.02483"> arXiv:2203.02483 </a> [<a href="/pdf/2203.02483" title="Download PDF" id="pdf-2203.02483" aria-labelledby="pdf-2203.02483">pdf</a>, <a href="/format/2203.02483" title="Other formats" id="oth-2203.02483" aria-labelledby="oth-2203.02483">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Ontological Learning from Weak Labels </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+L">Larry Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chou,+P+H">Po Hao Chou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Y+Y">Yi Yu Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ge,+Z">Ziqian Ge</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shah,+A">Ankit Shah</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Raj,+B">Bhiksha Raj</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Multimedia (cs.MM); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item16'>[16]</a> <a href ="/abs/2203.02655" title="Abstract" id="2203.02655"> arXiv:2203.02655 </a> [<a href="/pdf/2203.02655" title="Download PDF" id="pdf-2203.02655" aria-labelledby="pdf-2203.02655">pdf</a>, <a href="/format/2203.02655" title="Other formats" id="oth-2203.02655" aria-labelledby="oth-2203.02655">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Audio-visual speech separation based on joint feature representation with cross-modal attention </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xiong,+J">Junwen Xiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+P">Peng Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+L">Lei Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+W">Wei Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zha,+Y">Yufei Zha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yanning Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages, 3 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item17'>[17]</a> <a href ="/abs/2203.02678" title="Abstract" id="2203.02678"> arXiv:2203.02678 </a> [<a href="/pdf/2203.02678" title="Download PDF" id="pdf-2203.02678" aria-labelledby="pdf-2203.02678">pdf</a>, <a href="/format/2203.02678" title="Other formats" id="oth-2203.02678" aria-labelledby="oth-2203.02678">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> NeuralDPS: Neural Deterministic Plus Stochastic Model with Multiband Excitation for Noise-Controllable Waveform Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+T">Tao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+R">Ruibo Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yi,+J">Jiangyan Yi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tao,+J">Jianhua Tao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+Z">Zhengqi Wen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 15 pages, 12 figures; Accepted to TASLP. Demo page <a href="https://hairuo55.github.io/NeuralDPS" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. arXiv admin note: text overlap with <a href="https://arxiv.org/abs/1906.09573" data-arxiv-id="1906.09573" class="link-https">arXiv:1906.09573</a> by other authors </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Computation and Language (cs.CL); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item18'>[18]</a> <a href ="/abs/2203.02941" title="Abstract" id="2203.02941"> arXiv:2203.02941 </a> [<a href="/pdf/2203.02941" title="Download PDF" id="pdf-2203.02941" aria-labelledby="pdf-2203.02941">pdf</a>, <a href="/format/2203.02941" title="Other formats" id="oth-2203.02941" aria-labelledby="oth-2203.02941">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Single microphone speaker extraction using unified time-frequency Siamese-Unet </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Eisenberg,+A">Aviad Eisenberg</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gannot,+S">Sharon Gannot</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chazan,+S+E">Shlomo E. Chazan</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item19'>[19]</a> <a href ="/abs/2203.02942" title="Abstract" id="2203.02942"> arXiv:2203.02942 </a> [<a href="/pdf/2203.02942" title="Download PDF" id="pdf-2203.02942" aria-labelledby="pdf-2203.02942">pdf</a>, <a href="/format/2203.02942" title="Other formats" id="oth-2203.02942" aria-labelledby="oth-2203.02942">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> C-P Map: A Novel Evaluation Toolkit for Speaker Verification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+L">Lantian Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+D">Di Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+W">Wenqiang Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+D">Dong Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to Odyssey 2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item20'>[20]</a> <a href ="/abs/2203.02944" title="Abstract" id="2203.02944"> arXiv:2203.02944 </a> [<a href="/pdf/2203.02944" title="Download PDF" id="pdf-2203.02944" aria-labelledby="pdf-2203.02944">pdf</a>, <a href="/format/2203.02944" title="Other formats" id="oth-2203.02944" aria-labelledby="oth-2203.02944">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CNN self-attention voice activity detector </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sofer,+A">Amit Sofer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chazan,+S+E">Shlomo E. Chazan</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item21'>[21]</a> <a href ="/abs/2203.02967" title="Abstract" id="2203.02967"> arXiv:2203.02967 </a> [<a href="/pdf/2203.02967" title="Download PDF" id="pdf-2203.02967" aria-labelledby="pdf-2203.02967">pdf</a>, <a href="/format/2203.02967" title="Other formats" id="oth-2203.02967" aria-labelledby="oth-2203.02967">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Variational Auto-Encoder based Mandarin Speech Cloning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xing,+Q">Qingyu Xing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+X">Xiaohan Ma</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to Insterspeech 2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item22'>[22]</a> <a href ="/abs/2203.03022" title="Abstract" id="2203.03022"> arXiv:2203.03022 </a> [<a href="/pdf/2203.03022" title="Download PDF" id="pdf-2203.03022" aria-labelledby="pdf-2203.03022">pdf</a>, <a href="/format/2203.03022" title="Other formats" id="oth-2203.03022" aria-labelledby="oth-2203.03022">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> HEAR: Holistic Evaluation of Audio Representations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Turian,+J">Joseph Turian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shier,+J">Jordie Shier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khan,+H+R">Humair Raj Khan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Raj,+B">Bhiksha Raj</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schuller,+B+W">Bj枚rn W. Schuller</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Steinmetz,+C+J">Christian J. Steinmetz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Malloy,+C">Colin Malloy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tzanetakis,+G">George Tzanetakis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Velarde,+G">Gissel Velarde</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=McNally,+K">Kirk McNally</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Henry,+M">Max Henry</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pinto,+N">Nicolas Pinto</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Noufi,+C">Camille Noufi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Clough,+C">Christian Clough</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Herremans,+D">Dorien Herremans</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fonseca,+E">Eduardo Fonseca</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Engel,+J">Jesse Engel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Salamon,+J">Justin Salamon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Esling,+P">Philippe Esling</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Manocha,+P">Pranay Manocha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Watanabe,+S">Shinji Watanabe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+Z">Zeyu Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bisk,+Y">Yonatan Bisk</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> to appear in Proceedings of Machine Learning Research (PMLR): NeurIPS 2021 Competition Track </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Audio and Speech Processing (eess.AS); Machine Learning (stat.ML) </div> </div> </dd> <dt> <a name='item23'>[23]</a> <a href ="/abs/2203.03190" title="Abstract" id="2203.03190"> arXiv:2203.03190 </a> [<a href="/pdf/2203.03190" title="Download PDF" id="pdf-2203.03190" aria-labelledby="pdf-2203.03190">pdf</a>, <a href="/format/2203.03190" title="Other formats" id="oth-2203.03190" aria-labelledby="oth-2203.03190">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Speaker recognition by means of a combination of linear and nonlinear predictive models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Faundez-Zanuy,+M">Marcos Faundez-Zanuy</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 4 pages </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> 6th European Conference on EUROSPEEECH 1999 Budapest, Hungary, September 5-9, 1999 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item24'>[24]</a> <a href ="/abs/2203.03428" title="Abstract" id="2203.03428"> arXiv:2203.03428 </a> [<a href="/pdf/2203.03428" title="Download PDF" id="pdf-2203.03428" aria-labelledby="pdf-2203.03428">pdf</a>, <a href="/format/2203.03428" title="Other formats" id="oth-2203.03428" aria-labelledby="oth-2203.03428">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Attention-based Region of Interest (ROI) Detection for Speech Emotion Recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Desai,+J">Jay Desai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+H">Houwei Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shah,+R">Ravi Shah</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Paper written in 2019 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item25'>[25]</a> <a href ="/abs/2203.03706" title="Abstract" id="2203.03706"> arXiv:2203.03706 </a> [<a href="/pdf/2203.03706" title="Download PDF" id="pdf-2203.03706" aria-labelledby="pdf-2203.03706">pdf</a>, <a href="/format/2203.03706" title="Other formats" id="oth-2203.03706" aria-labelledby="oth-2203.03706">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Detection of AI Synthesized Hindi Speech </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bhatia,+K">Karan Bhatia</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Agrawal,+A">Ansh Agrawal</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Singh,+P">Priyanka Singh</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Singh,+A+K">Arun Kumar Singh</a> (2) ((1) Dhirubhai Ambani Institute of Information and Communication Technology, (2) Indian Institute of Technology Jammu)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 Pages, 6 Figures, 4 Tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item26'>[26]</a> <a href ="/abs/2203.03812" title="Abstract" id="2203.03812"> arXiv:2203.03812 </a> [<a href="/pdf/2203.03812" title="Download PDF" id="pdf-2203.03812" aria-labelledby="pdf-2203.03812">pdf</a>, <a href="/format/2203.03812" title="Other formats" id="oth-2203.03812" aria-labelledby="oth-2203.03812">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SpeechFormer: A Hierarchical Efficient Framework Incorporating the Characteristics of Speech </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+W">Weidong Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xing,+X">Xiaofen Xing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+X">Xiangmin Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pang,+J">Jianxin Pang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+L">Lan Du</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages, 4figures. This paper was submitted to Insterspeech 2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item27'>[27]</a> <a href ="/abs/2203.03932" title="Abstract" id="2203.03932"> arXiv:2203.03932 </a> [<a href="/pdf/2203.03932" title="Download PDF" id="pdf-2203.03932" aria-labelledby="pdf-2203.03932">pdf</a>, <a href="/format/2203.03932" title="Other formats" id="oth-2203.03932" aria-labelledby="oth-2203.03932">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Digital Speech Algorithms for Speaker De-Identification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Marinozzi,+S">Stefano Marinozzi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Faundez-Zanuy,+M">Marcos Faundez-Zanuy</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 4 pages </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> 2014 5th IEEE Conference on Cognitive Infocommunications (CogInfoCom), 2014, pp. 317-320 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item28'>[28]</a> <a href ="/abs/2203.04099" title="Abstract" id="2203.04099"> arXiv:2203.04099 </a> [<a href="/pdf/2203.04099" title="Download PDF" id="pdf-2203.04099" aria-labelledby="pdf-2203.04099">pdf</a>, <a href="/format/2203.04099" title="Other formats" id="oth-2203.04099" aria-labelledby="oth-2203.04099">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VoViT: Low Latency Graph-based Audio-Visual Voice Separation Transformer </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Montesinos,+J+F">Juan F. Montesinos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kadandale,+V+S">Venkatesh S. Kadandale</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haro,+G">Gloria Haro</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to ECCV 2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item29'>[29]</a> <a href ="/abs/2203.04638" title="Abstract" id="2203.04638"> arXiv:2203.04638 </a> [<a href="/pdf/2203.04638" title="Download PDF" id="pdf-2203.04638" aria-labelledby="pdf-2203.04638">pdf</a>, <a href="/format/2203.04638" title="Other formats" id="oth-2203.04638" aria-labelledby="oth-2203.04638">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Speaker Identification Experiments Under Gender De-Identification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Faundez-Zanuy,+M">Marcos Faundez-Zanuy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sesa-Nogueras,+E">Enric Sesa-Nogueras</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Marinozzi,+S">Stefano Marinozzi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages. arXiv admin note: substantial text overlap with <a href="https://arxiv.org/abs/2203.03932" data-arxiv-id="2203.03932" class="link-https">arXiv:2203.03932</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> 2015 International Carnahan Conference on Security Technology (ICCST), 2015, pp. 1-6 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item30'>[30]</a> <a href ="/abs/2203.04696" title="Abstract" id="2203.04696"> arXiv:2203.04696 </a> [<a href="/pdf/2203.04696" title="Download PDF" id="pdf-2203.04696" aria-labelledby="pdf-2203.04696">pdf</a>, <a href="/format/2203.04696" title="Other formats" id="oth-2203.04696" aria-labelledby="oth-2203.04696">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Robust Federated Learning Against Adversarial Attacks for Speech Emotion Recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+Y">Yi Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Laridi,+S">Sofiane Laridi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ren,+Z">Zhao Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Palmer,+G">Gregory Palmer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schuller,+B+W">Bj枚rn W. Schuller</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fisichella,+M">Marco Fisichella</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages, 6 figures, 3 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item31'>[31]</a> <a href ="/abs/2203.04880" title="Abstract" id="2203.04880"> arXiv:2203.04880 </a> [<a href="/pdf/2203.04880" title="Download PDF" id="pdf-2203.04880" aria-labelledby="pdf-2203.04880">pdf</a>, <a href="/format/2203.04880" title="Other formats" id="oth-2203.04880" aria-labelledby="oth-2203.04880">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> An Environmental Feature Representation in I-vector Space for Room Verification and Metadata Estimation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Caulley,+D">Desmond Caulley</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item32'>[32]</a> <a href ="/abs/2203.05333" title="Abstract" id="2203.05333"> arXiv:2203.05333 </a> [<a href="/pdf/2203.05333" title="Download PDF" id="pdf-2203.05333" aria-labelledby="pdf-2203.05333">pdf</a>, <a href="/format/2203.05333" title="Other formats" id="oth-2203.05333" aria-labelledby="oth-2203.05333">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> EACELEB: An East Asian Language Speaking Celebrity Dataset for Speaker Recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Caulley,+D">Desmond Caulley</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yufeng Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Anderson,+D">David Anderson</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item33'>[33]</a> <a href ="/abs/2203.05642" title="Abstract" id="2203.05642"> arXiv:2203.05642 </a> [<a href="/pdf/2203.05642" title="Download PDF" id="pdf-2203.05642" aria-labelledby="pdf-2203.05642">pdf</a>, <a href="/format/2203.05642" title="Other formats" id="oth-2203.05642" aria-labelledby="oth-2203.05642">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Parameter-Free Attentive Scoring for Speaker Verification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pelecanos,+J">Jason Pelecanos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Q">Quan Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+Y">Yiling Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moreno,+I+L">Ignacio Lopez Moreno</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Computation and Language (cs.CL); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item34'>[34]</a> <a href ="/abs/2203.05882" title="Abstract" id="2203.05882"> arXiv:2203.05882 </a> [<a href="/pdf/2203.05882" title="Download PDF" id="pdf-2203.05882" aria-labelledby="pdf-2203.05882">pdf</a>, <a href="/format/2203.05882" title="Other formats" id="oth-2203.05882" aria-labelledby="oth-2203.05882">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Improving the transferability of speech separation by meta-learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+K">Kuan-Po Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Y">Yuan-Kuei Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+H">Hung-yi Lee</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item35'>[35]</a> <a href ="/abs/2203.06059" title="Abstract" id="2203.06059"> arXiv:2203.06059 </a> [<a href="/pdf/2203.06059" title="Download PDF" id="pdf-2203.06059" aria-labelledby="pdf-2203.06059">pdf</a>, <a href="/format/2203.06059" title="Other formats" id="oth-2203.06059" aria-labelledby="oth-2203.06059">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Deep Convolutional Neural Network for Roadway Incident Surveillance Using Audio Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Islam,+Z">Zubayer Islam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Abdel-Aty,+M">Mohamed Abdel-Aty</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item36'>[36]</a> <a href ="/abs/2203.06064" title="Abstract" id="2203.06064"> arXiv:2203.06064 </a> [<a href="/pdf/2203.06064" title="Download PDF" id="pdf-2203.06064" aria-labelledby="pdf-2203.06064">pdf</a>, <a href="/format/2203.06064" title="Other formats" id="oth-2203.06064" aria-labelledby="oth-2203.06064">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Climate Change & Computer Audition: A Call to Action and Overview on Audio Intelligence to Help Save the Planet </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Schuller,+B+W">Bj枚rn W. Schuller</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Akman,+A">Alican Akman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+Y">Yi Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Coppock,+H">Harry Coppock</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gebhard,+A">Alexander Gebhard</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kathan,+A">Alexander Kathan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rituerto-Gonz%C3%A1lez,+E">Esther Rituerto-Gonz谩lez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Triantafyllopoulos,+A">Andreas Triantafyllopoulos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pokorny,+F+B">Florian B. Pokorny</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG) </div> </div> </dd> <dt> <a name='item37'>[37]</a> <a href ="/abs/2203.06220" title="Abstract" id="2203.06220"> arXiv:2203.06220 </a> [<a href="/pdf/2203.06220" title="Download PDF" id="pdf-2203.06220" aria-labelledby="pdf-2203.06220">pdf</a>, <a href="/format/2203.06220" title="Other formats" id="oth-2203.06220" aria-labelledby="oth-2203.06220">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Infrastructure-free, Deep Learned Urban Noise Monitoring at $\sim$100mW </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yun,+J">Jihoon Yun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sangeeta">Sangeeta Srivastava</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Roy,+D">Dhrubojyoti Roy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stohs,+N">Nathan Stohs</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mydlarz,+C">Charlie Mydlarz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Salman,+M">Mahin Salman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Steers,+B">Bea Steers</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bello,+J+P">Juan Pablo Bello</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Arora,+A">Anish Arora</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted in ICCPS 2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Networking and Internet Architecture (cs.NI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item38'>[38]</a> <a href ="/abs/2203.06517" title="Abstract" id="2203.06517"> arXiv:2203.06517 </a> [<a href="/pdf/2203.06517" title="Download PDF" id="pdf-2203.06517" aria-labelledby="pdf-2203.06517">pdf</a>, <a href="/format/2203.06517" title="Other formats" id="oth-2203.06517" aria-labelledby="oth-2203.06517">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SA-SASV: An End-to-End Spoof-Aggregated Spoofing-Aware Speaker Verification System </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Teng,+Z">Zhongwei Teng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+Q">Quchen Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=White,+J">Jules White</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Powell,+M+E">Maria E. Powell</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schmidt,+D+C">Douglas C. Schmidt</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Update Experiment Results in ASV2019 protocol </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item39'>[39]</a> <a href ="/abs/2203.06583" title="Abstract" id="2203.06583"> arXiv:2203.06583 </a> [<a href="/pdf/2203.06583" title="Download PDF" id="pdf-2203.06583" aria-labelledby="pdf-2203.06583">pdf</a>, <a href="/format/2203.06583" title="Other formats" id="oth-2203.06583" aria-labelledby="oth-2203.06583">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Bi-Sampling Approach to Classify Music Mood leveraging Raga-Rasa Association in Indian Classical Music </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=C,+M+R+B">Mohan Rao B C</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Arkachaari,+V">Vinayak Arkachaari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=N,+H+M">Harsha M N</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=N,+S+M">Sushmitha M N</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=K,+G+R+K">Gayathri Ramesh K K</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=S,+U+M">Ullas M S</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rao,+P+M">Pathi Mohan Rao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=G,+S">Sudha G</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Darapaneni,+N">Narayana Darapaneni</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item40'>[40]</a> <a href ="/abs/2203.06760" title="Abstract" id="2203.06760"> arXiv:2203.06760 </a> [<a href="/pdf/2203.06760" title="Download PDF" id="pdf-2203.06760" aria-labelledby="pdf-2203.06760">pdf</a>, <a href="/format/2203.06760" title="Other formats" id="oth-2203.06760" aria-labelledby="oth-2203.06760">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CMKD: CNN/Transformer-Based Cross-Model Knowledge Distillation for Audio Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+Y">Yuan Gong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khurana,+S">Sameer Khurana</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rouditchenko,+A">Andrew Rouditchenko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Glass,+J">James Glass</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item41'>[41]</a> <a href ="/abs/2203.07179" title="Abstract" id="2203.07179"> arXiv:2203.07179 </a> [<a href="/pdf/2203.07179" title="Download PDF" id="pdf-2203.07179" aria-labelledby="pdf-2203.07179">pdf</a>, <a href="/format/2203.07179" title="Other formats" id="oth-2203.07179" aria-labelledby="oth-2203.07179">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MDNet: Learning Monaural Speech Enhancement from Deep Prior Gradient </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+A">Andong Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+C">Chengshi Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Z">Ziyang Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xiaodong Li</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to Interspeech2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item42'>[42]</a> <a href ="/abs/2203.07195" title="Abstract" id="2203.07195"> arXiv:2203.07195 </a> [<a href="/pdf/2203.07195" title="Download PDF" id="pdf-2203.07195" aria-labelledby="pdf-2203.07195">pdf</a>, <a href="/format/2203.07195" title="Other formats" id="oth-2203.07195" aria-labelledby="oth-2203.07195">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> TaylorBeamformer: Learning All-Neural Beamformer for Multi-Channel Speech Enhancement from Taylor's Approximation Theory </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+A">Andong Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+G">Guochen Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+C">Chengshi Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xiaodong Li</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to Interspeech2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item43'>[43]</a> <a href ="/abs/2203.07996" title="Abstract" id="2203.07996"> arXiv:2203.07996 </a> [<a href="/pdf/2203.07996" title="Download PDF" id="pdf-2203.07996" aria-labelledby="pdf-2203.07996">pdf</a>, <a href="/format/2203.07996" title="Other formats" id="oth-2203.07996" aria-labelledby="oth-2203.07996">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Leveraging Unimodal Self-Supervised Learning for Multimodal Audio-Visual Speech Recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+X">Xichen Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+P">Peiyu Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+Y">Yichen Gong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+H">Helong Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+X">Xinbing Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Z">Zhouhan Lin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> ACL2022 Main Conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Computer Vision and Pattern Recognition (cs.CV); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item44'>[44]</a> <a href ="/abs/2203.08073" title="Abstract" id="2203.08073"> arXiv:2203.08073 </a> [<a href="/pdf/2203.08073" title="Download PDF" id="pdf-2203.08073" aria-labelledby="pdf-2203.08073">pdf</a>, <a href="/format/2203.08073" title="Other formats" id="oth-2203.08073" aria-labelledby="oth-2203.08073">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Can A Neural Network Hear the Shape of A Drum? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Y">Yueqi Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fogler,+M+M">Michael M. Fogler</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Data Analysis, Statistics and Probability (physics.data-an) </div> </div> </dd> <dt> <a name='item45'>[45]</a> <a href ="/abs/2203.08439" title="Abstract" id="2203.08439"> arXiv:2203.08439 </a> [<a href="/pdf/2203.08439" title="Download PDF" id="pdf-2203.08439" aria-labelledby="pdf-2203.08439">pdf</a>, <a href="/format/2203.08439" title="Other formats" id="oth-2203.08439" aria-labelledby="oth-2203.08439">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Instance-level loss based multiple-instance learning framework for acoustic scene classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+W">Won-Gook Choi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+J">Joon-Hyuk Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+J">Jae-Mo Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moon,+H">Han-Gil Moon</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item46'>[46]</a> <a href ="/abs/2203.08490" title="Abstract" id="2203.08490"> arXiv:2203.08490 </a> [<a href="/pdf/2203.08490" title="Download PDF" id="pdf-2203.08490" aria-labelledby="pdf-2203.08490">pdf</a>, <a href="/format/2203.08490" title="Other formats" id="oth-2203.08490" aria-labelledby="oth-2203.08490">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning Audio Representations with MLPs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Morshed,+M+M">Mashrur M. Morshed</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ahsan,+A+O">Ahmad Omar Ahsan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mahmud,+H">Hasan Mahmud</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hasan,+M+K">Md. Kamrul Hasan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In submission to Proceedings of Machine Learning Research (PMLR): NeurIPS 2021 Competition Track </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item47'>[47]</a> <a href ="/abs/2203.09098" title="Abstract" id="2203.09098"> arXiv:2203.09098 </a> [<a href="/pdf/2203.09098" title="Download PDF" id="pdf-2203.09098" aria-labelledby="pdf-2203.09098">pdf</a>, <a href="/format/2203.09098" title="Other formats" id="oth-2203.09098" aria-labelledby="oth-2203.09098">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> TMS: A Temporal Multi-scale Backbone Design for Speaker Embedding </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+R">Ruiteng Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+J">Jianguo Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+X">Xugang Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+W">Wenhuan Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+D">Di Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+J">Junhai Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+L">Lin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ji,+Y">Yantao Ji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dang,+J">Jianwu Dang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Due to the limitation "The abstract field cannot be longer than 1,920 characters", the abstract here is shorter than that in the PDF file </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item48'>[48]</a> <a href ="/abs/2203.09129" title="Abstract" id="2203.09129"> arXiv:2203.09129 </a> [<a href="/pdf/2203.09129" title="Download PDF" id="pdf-2203.09129" aria-labelledby="pdf-2203.09129">pdf</a>, <a href="/format/2203.09129" title="Other formats" id="oth-2203.09129" aria-labelledby="oth-2203.09129">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Contrastive Learning with Positive-Negative Frame Mask for Music Representation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yao,+D">Dong Yao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Z">Zhou Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+S">Shengyu Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+J">Jieming Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+Y">Yudong Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+R">Rui Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+X">Xiuqiang He</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by WWW2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item49'>[49]</a> <a href ="/abs/2203.09148" title="Abstract" id="2203.09148"> arXiv:2203.09148 </a> [<a href="/pdf/2203.09148" title="Download PDF" id="pdf-2203.09148" aria-labelledby="pdf-2203.09148">pdf</a>, <a href="/format/2203.09148" title="Other formats" id="oth-2203.09148" aria-labelledby="oth-2203.09148">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Prediction of speech intelligibility with DNN-based performance measures </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Martinez,+A+M+C">Angel Mario Castro Martinez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Spille,+C">Constantin Spille</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ro%C3%9Fbach,+J">Jana Ro脽bach</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kollmeier,+B">Birger Kollmeier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meyer,+B+T">Bernd T. Meyer</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Computer Speech & Language, 74, p.101329 (2022) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item50'>[50]</a> <a href ="/abs/2203.09231" title="Abstract" id="2203.09231"> arXiv:2203.09231 </a> [<a href="/pdf/2203.09231" title="Download PDF" id="pdf-2203.09231" aria-labelledby="pdf-2203.09231">pdf</a>, <a href="/format/2203.09231" title="Other formats" id="oth-2203.09231" aria-labelledby="oth-2203.09231">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Speaker recognition using residual signal of linear and nonlinear prediction models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Faundez-Zanuy,+M">Marcos Faundez-Zanuy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rodr%C3%ADguez-Porcheron,+D">Daniel Rodr铆guez-Porcheron</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 4 pages, published in 5th International Conference on spoken language processing. Vol.2 pp.121-124. ICSLP 1998. ISBN 1-876346-17-5 </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> 5th International Conference on spoken language processing. Vol.2 pp.121-124. ICSLP 1998. ISBN 1-876346-17-5 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> </dl> <div class='paging'>Total of 322 entries : <span>1-50</span> <a href=/list/cs.SD/2022-03?skip=50&show=50>51-100</a> <a href=/list/cs.SD/2022-03?skip=100&show=50>101-150</a> <a href=/list/cs.SD/2022-03?skip=150&show=50>151-200</a> <span>...</span> <a href=/list/cs.SD/2022-03?skip=300&show=50>301-322</a> </div> <div class='morefewer'>Showing up to 50 entries per page: <a href=/list/cs.SD/2022-03?skip=0&show=25 rel="nofollow"> fewer</a> | <a href=/list/cs.SD/2022-03?skip=0&show=100 rel="nofollow"> more</a> | <a href=/list/cs.SD/2022-03?skip=0&show=2000 rel="nofollow"> all</a> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em"> <!-- Macro-Column 1 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- End Macro-Column 1 --> <!-- Macro-Column 2 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> <!-- End Macro-Column 2 --> </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>