CINXE.COM

Sound

<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> <head> <title>Sound </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20240822" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a> <!-- start desktop header --> <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>&gt;</span> <a href="/list/cs.SD/recent">cs.SD</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div><!-- /end desktop header --> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div><!-- /end mobile-header --> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Sound</h1> <h2>Authors and titles for recent submissions</h2> <ul> <li> <a href="/list/cs.SD/recent?skip=0&amp;show=50"> Wed, 27 Nov 2024 </a> </li><li> <a href="/list/cs.SD/recent?skip=5&amp;show=50"> Tue, 26 Nov 2024 </a> </li><li> <a href="/list/cs.SD/recent?skip=11&amp;show=50"> Mon, 25 Nov 2024 </a> </li><li> <a href="/list/cs.SD/recent?skip=22&amp;show=50"> Fri, 22 Nov 2024 </a> </li><li> <a href="/list/cs.SD/recent?skip=28&amp;show=50"> Thu, 21 Nov 2024 </a> </li></ul> <p>See today's <a id="new-cs.SD" aria-labelledby="new-cs.SD" href="/list/cs.SD/new">new</a> changes</p> <div class='paging'>Total of 36 entries </div> <div class='morefewer'>Showing up to 50 entries per page: <a href=/list/cs.SD/recent?skip=0&amp;show=25 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>Wed, 27 Nov 2024 (showing 5 of 5 entries )</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2411.17349" title="Abstract" id="2411.17349"> arXiv:2411.17349 </a> [<a href="/pdf/2411.17349" title="Download PDF" id="pdf-2411.17349" aria-labelledby="pdf-2411.17349">pdf</a>, <a href="https://arxiv.org/html/2411.17349v1" title="View HTML" id="html-2411.17349" aria-labelledby="html-2411.17349" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.17349" title="Other formats" id="oth-2411.17349" aria-labelledby="oth-2411.17349">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Comparative Analysis of ASR Methods for Speech Deepfake Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Salvi,+D">Davide Salvi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yadav,+A+K+S">Amit Kumar Singh Yadav</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Bhagtani,+K">Kratika Bhagtani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Negroni,+V">Viola Negroni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Bestagini,+P">Paolo Bestagini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Delp,+E+J">Edward J. Delp</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published at Asilomar Conference on Signals, Systems, and Computers 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span> </div> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2411.16729" title="Abstract" id="2411.16729"> arXiv:2411.16729 </a> [<a href="/pdf/2411.16729" title="Download PDF" id="pdf-2411.16729" aria-labelledby="pdf-2411.16729">pdf</a>, <a href="https://arxiv.org/html/2411.16729v1" title="View HTML" id="html-2411.16729" aria-labelledby="html-2411.16729" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.16729" title="Other formats" id="oth-2411.16729" aria-labelledby="oth-2411.16729">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DiM-Gestor: Co-Speech Gesture Generation with Adaptive Layer Normalization Mamba-2 </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhang,+F">Fan Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhao,+S">Siyuan Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ji,+N">Naye Ji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+Z">Zhaohan Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wu,+J">Jingmei Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Gao,+F">Fuxing Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ye,+Z">Zhenqing Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yan,+L">Leyao Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Dai,+L">Lanxin Dai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Geng,+W">Weidong Geng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Lyu,+X">Xin Lyu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhao,+B">Bozuo Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yu,+D">Dingguo Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Du,+H">Hui Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Hu,+B">Bin Hu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 11 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Graphics (cs.GR); Human-Computer Interaction (cs.HC); Multimedia (cs.MM) </div> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2411.17698" title="Abstract" id="2411.17698"> arXiv:2411.17698 </a> (cross-list from cs.CV) [<a href="/pdf/2411.17698" title="Download PDF" id="pdf-2411.17698" aria-labelledby="pdf-2411.17698">pdf</a>, <a href="https://arxiv.org/html/2411.17698v1" title="View HTML" id="html-2411.17698" aria-labelledby="html-2411.17698" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.17698" title="Other formats" id="oth-2411.17698" aria-labelledby="oth-2411.17698">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Video-Guided Foley Sound Generation with Multimodal Controls </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Chen,+Z">Ziyang Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Seetharaman,+P">Prem Seetharaman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Russell,+B">Bryan Russell</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Nieto,+O">Oriol Nieto</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Bourgin,+D">David Bourgin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Owens,+A">Andrew Owens</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Salamon,+J">Justin Salamon</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project site: <a href="https://ificl.github.io/MultiFoley/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Multimedia (cs.MM); Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2411.17690" title="Abstract" id="2411.17690"> arXiv:2411.17690 </a> (cross-list from cs.MM) [<a href="/pdf/2411.17690" title="Download PDF" id="pdf-2411.17690" aria-labelledby="pdf-2411.17690">pdf</a>, <a href="https://arxiv.org/html/2411.17690v1" title="View HTML" id="html-2411.17690" aria-labelledby="html-2411.17690" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.17690" title="Other formats" id="oth-2411.17690" aria-labelledby="oth-2411.17690">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Visatronic: A Multimodal Decoder-Only Model for Speech Synthesis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Gupta,+A">Akshita Gupta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Likhomanenko,+T">Tatiana Likhomanenko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yang,+K+D">Karren Dai Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Bai,+R+H">Richard He Bai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Aldeneh,+Z">Zakaria Aldeneh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Jaitly,+N">Navdeep Jaitly</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Multimedia (cs.MM)</span>; Computer Vision and Pattern Recognition (cs.CV); Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2411.17607" title="Abstract" id="2411.17607"> arXiv:2411.17607 </a> (cross-list from cs.CL) [<a href="/pdf/2411.17607" title="Download PDF" id="pdf-2411.17607" aria-labelledby="pdf-2411.17607">pdf</a>, <a href="/format/2411.17607" title="Other formats" id="oth-2411.17607" aria-labelledby="oth-2411.17607">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Scaling Speech-Text Pre-training with Synthetic Interleaved Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zeng,+A">Aohan Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Du,+Z">Zhengxiao Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Liu,+M">Mingdao Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhang,+L">Lei Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Jiang,+S">Shengmin Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Dong,+Y">Yuxiao Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Tang,+J">Jie Tang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> </dl> <dl id='articles'> <h3>Tue, 26 Nov 2024 (showing 6 of 6 entries )</h3> <dt> <a name='item6'>[6]</a> <a href ="/abs/2411.16276" title="Abstract" id="2411.16276"> arXiv:2411.16276 </a> [<a href="/pdf/2411.16276" title="Download PDF" id="pdf-2411.16276" aria-labelledby="pdf-2411.16276">pdf</a>, <a href="https://arxiv.org/html/2411.16276v1" title="View HTML" id="html-2411.16276" aria-labelledby="html-2411.16276" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.16276" title="Other formats" id="oth-2411.16276" aria-labelledby="oth-2411.16276">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The SVASR System for Text-dependent Speaker Verification (TdSV) AAIC Challenge 2024 </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Molavi,+M">Mohammadreza Molavi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Khodadadi,+R">Reza Khodadadi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2411.16147" title="Abstract" id="2411.16147"> arXiv:2411.16147 </a> [<a href="/pdf/2411.16147" title="Download PDF" id="pdf-2411.16147" aria-labelledby="pdf-2411.16147">pdf</a>, <a href="https://arxiv.org/html/2411.16147v1" title="View HTML" id="html-2411.16147" aria-labelledby="html-2411.16147" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.16147" title="Other formats" id="oth-2411.16147" aria-labelledby="oth-2411.16147">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SKQVC: One-Shot Voice Conversion by K-Means Quantization with Self-Supervised Speech Representations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Sim,+Y">Youngjun Sim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yoon,+J">Jinsung Yoon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Suh,+Y">Young-Joo Suh</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2411.15913" title="Abstract" id="2411.15913"> arXiv:2411.15913 </a> [<a href="/pdf/2411.15913" title="Download PDF" id="pdf-2411.15913" aria-labelledby="pdf-2411.15913">pdf</a>, <a href="https://arxiv.org/html/2411.15913v1" title="View HTML" id="html-2411.15913" aria-labelledby="html-2411.15913" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.15913" title="Other formats" id="oth-2411.15913" aria-labelledby="oth-2411.15913">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Training-Free Approach for Music Style Transfer with Latent Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Kim,+S">Sooyoung Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Kwon,+J">Joonwoo Kwon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+H">Heehwan Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yoo,+S">Shinjae Yoo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Lin,+Y">Yuewei Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Cha,+J">Jiook Cha</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Codes will be released upon acceptance </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2411.15457" title="Abstract" id="2411.15457"> arXiv:2411.15457 </a> [<a href="/pdf/2411.15457" title="Download PDF" id="pdf-2411.15457" aria-labelledby="pdf-2411.15457">pdf</a>, <a href="https://arxiv.org/html/2411.15457v1" title="View HTML" id="html-2411.15457" aria-labelledby="html-2411.15457" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.15457" title="Other formats" id="oth-2411.15457" aria-labelledby="oth-2411.15457">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hindi audio-video-Deepfake (HAV-DF): A Hindi language-based Audio-video Deepfake Dataset </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Kaur,+S">Sukhandeep Kaur</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Buhari,+M">Mubashir Buhari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Khandelwal,+N">Naman Khandelwal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Tyagi,+P">Priyansh Tyagi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Sharma,+K">Kiran Sharma</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Cryptography and Security (cs.CR); Graphics (cs.GR); Multimedia (cs.MM); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2411.16331" title="Abstract" id="2411.16331"> arXiv:2411.16331 </a> (cross-list from cs.MM) [<a href="/pdf/2411.16331" title="Download PDF" id="pdf-2411.16331" aria-labelledby="pdf-2411.16331">pdf</a>, <a href="https://arxiv.org/html/2411.16331v1" title="View HTML" id="html-2411.16331" aria-labelledby="html-2411.16331" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.16331" title="Other formats" id="oth-2411.16331" aria-labelledby="oth-2411.16331">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sonic: Shifting Focus to Global Audio Perception in Portrait Animation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ji,+X">Xiaozhong Ji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Hu,+X">Xiaobin Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xu,+Z">Zhihong Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhu,+J">Junwei Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Lin,+C">Chuming Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=He,+Q">Qingdong He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhang,+J">Jiangning Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Luo,+D">Donghao Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Chen,+Y">Yi Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Lin,+Q">Qin Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Lu,+Q">Qinglin Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+C">Chengjie Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> refer to our main-page \url{<a href="https://jixiaozhong.github.io/Sonic/" rel="external noopener nofollow" class="link-external link-https">this https URL</a>} </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Multimedia (cs.MM)</span>; Computer Vision and Pattern Recognition (cs.CV); Graphics (cs.GR); Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item11'>[11]</a> <a href ="/abs/2411.15447" title="Abstract" id="2411.15447"> arXiv:2411.15447 </a> (cross-list from cs.MM) [<a href="/pdf/2411.15447" title="Download PDF" id="pdf-2411.15447" aria-labelledby="pdf-2411.15447">pdf</a>, <a href="https://arxiv.org/html/2411.15447v2" title="View HTML" id="html-2411.15447" aria-labelledby="html-2411.15447" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.15447" title="Other formats" id="oth-2411.15447" aria-labelledby="oth-2411.15447">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Gotta Hear Them All: Sound Source Aware Vision to Audio Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Guo,+W">Wei Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+H">Heng Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ma,+J">Jianbo Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Cai,+W">Weidong Cai</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 9 figures, source code released at <a href="https://github.com/wguo86/SSV2A" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Multimedia (cs.MM)</span>; Computer Vision and Pattern Recognition (cs.CV); Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> </dl> <dl id='articles'> <h3>Mon, 25 Nov 2024 (showing 11 of 11 entries )</h3> <dt> <a name='item12'>[12]</a> <a href ="/abs/2411.15082" title="Abstract" id="2411.15082"> arXiv:2411.15082 </a> [<a href="/pdf/2411.15082" title="Download PDF" id="pdf-2411.15082" aria-labelledby="pdf-2411.15082">pdf</a>, <a href="https://arxiv.org/html/2411.15082v1" title="View HTML" id="html-2411.15082" aria-labelledby="html-2411.15082" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.15082" title="Other formats" id="oth-2411.15082" aria-labelledby="oth-2411.15082">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Speaker Identification with Minimal Dataset and Constrained Resources using 1D-Convolution Neural Network </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Shahan,+I+N">Irfan Nafiz Shahan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Auvi,+P+A">Pulok Ahmed Auvi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item13'>[13]</a> <a href ="/abs/2411.14907" title="Abstract" id="2411.14907"> arXiv:2411.14907 </a> [<a href="/pdf/2411.14907" title="Download PDF" id="pdf-2411.14907" aria-labelledby="pdf-2411.14907">pdf</a>, <a href="https://arxiv.org/html/2411.14907v1" title="View HTML" id="html-2411.14907" aria-labelledby="html-2411.14907" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14907" title="Other formats" id="oth-2411.14907" aria-labelledby="oth-2411.14907">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DAIRHuM: A Platform for Directly Aligning AI Representations with Human Musical Judgments applied to Carnatic Music </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ravikumar,+P+T">Prashanth Thattai Ravikumar</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 4 Pages, ICASSP workshop submission </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item14'>[14]</a> <a href ="/abs/2411.14842" title="Abstract" id="2411.14842"> arXiv:2411.14842 </a> [<a href="/pdf/2411.14842" title="Download PDF" id="pdf-2411.14842" aria-labelledby="pdf-2411.14842">pdf</a>, <a href="https://arxiv.org/html/2411.14842v1" title="View HTML" id="html-2411.14842" aria-labelledby="html-2411.14842" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14842" title="Other formats" id="oth-2411.14842" aria-labelledby="oth-2411.14842">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Who Can Withstand Chat-Audio Attacks? An Evaluation Benchmark for Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yang,+W">Wanqi Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Li,+Y">Yanda Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Fang,+M">Meng Fang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wei,+Y">Yunchao Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhou,+T">Tianyi Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Chen,+L">Ling Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item15'>[15]</a> <a href ="/abs/2411.14773" title="Abstract" id="2411.14773"> arXiv:2411.14773 </a> [<a href="/pdf/2411.14773" title="Download PDF" id="pdf-2411.14773" aria-labelledby="pdf-2411.14773">pdf</a>, <a href="https://arxiv.org/html/2411.14773v1" title="View HTML" id="html-2411.14773" aria-labelledby="html-2411.14773" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14773" title="Other formats" id="oth-2411.14773" aria-labelledby="oth-2411.14773">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Mode-conditioned music learning and composition: a spiking neural network inspired by neuroscience and psychology </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Liang,+Q">Qian Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zeng,+Y">Yi Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Tang,+M">Menghaoran Tang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS); Neurons and Cognition (q-bio.NC) </div> </div> </dd> <dt> <a name='item16'>[16]</a> <a href ="/abs/2411.14627" title="Abstract" id="2411.14627"> arXiv:2411.14627 </a> [<a href="/pdf/2411.14627" title="Download PDF" id="pdf-2411.14627" aria-labelledby="pdf-2411.14627">pdf</a>, <a href="https://arxiv.org/html/2411.14627v1" title="View HTML" id="html-2411.14627" aria-labelledby="html-2411.14627" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14627" title="Other formats" id="oth-2411.14627" aria-labelledby="oth-2411.14627">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generative AI for Music and Audio </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Dong,+H">Hao-Wen Dong</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> PhD Dissertation </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Multimedia (cs.MM); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item17'>[17]</a> <a href ="/abs/2411.14586" title="Abstract" id="2411.14586"> arXiv:2411.14586 </a> [<a href="/pdf/2411.14586" title="Download PDF" id="pdf-2411.14586" aria-labelledby="pdf-2411.14586">pdf</a>, <a href="https://arxiv.org/html/2411.14586v1" title="View HTML" id="html-2411.14586" aria-labelledby="html-2411.14586" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14586" title="Other formats" id="oth-2411.14586" aria-labelledby="oth-2411.14586">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Listening for Expert Identified Linguistic Features: Assessment of Audio Deepfake Discernment among Undergraduate Students </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Bhalli,+N+N">Noshaba N. Bhalli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Naqvi,+N">Nehal Naqvi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Evered,+C">Chloe Evered</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Mallinson,+C">Christine Mallinson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Janeja,+V+P">Vandana P. Janeja</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Computers and Society (cs.CY); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item18'>[18]</a> <a href ="/abs/2411.14474" title="Abstract" id="2411.14474"> arXiv:2411.14474 </a> [<a href="/pdf/2411.14474" title="Download PDF" id="pdf-2411.14474" aria-labelledby="pdf-2411.14474">pdf</a>, <a href="https://arxiv.org/html/2411.14474v1" title="View HTML" id="html-2411.14474" aria-labelledby="html-2411.14474" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14474" title="Other formats" id="oth-2411.14474" aria-labelledby="oth-2411.14474">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Attention-guided Spectrogram Sequence Modeling with CNNs for Music Genre Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Aditya">Aditya Sridhar</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6 pages, 7 figures, 17 References </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item19'>[19]</a> <a href ="/abs/2411.14972" title="Abstract" id="2411.14972"> arXiv:2411.14972 </a> (cross-list from eess.AS) [<a href="/pdf/2411.14972" title="Download PDF" id="pdf-2411.14972" aria-labelledby="pdf-2411.14972">pdf</a>, <a href="https://arxiv.org/html/2411.14972v1" title="View HTML" id="html-2411.14972" aria-labelledby="html-2411.14972" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14972" title="Other formats" id="oth-2411.14972" aria-labelledby="oth-2411.14972">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Open-Amp: Synthetic Data Framework for Audio Effect Foundation Models </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Wright,+A">Alec Wright</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Carson,+A">Alistair Carson</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Juvela,+L">Lauri Juvela</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Audio and Speech Processing (eess.AS)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Sound (cs.SD) </div> </div> </dd> <dt> <a name='item20'>[20]</a> <a href ="/abs/2411.14493" title="Abstract" id="2411.14493"> arXiv:2411.14493 </a> (cross-list from cs.CL) [<a href="/pdf/2411.14493" title="Download PDF" id="pdf-2411.14493" aria-labelledby="pdf-2411.14493">pdf</a>, <a href="/format/2411.14493" title="Other formats" id="oth-2411.14493" aria-labelledby="oth-2411.14493">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> From Statistical Methods to Pre-Trained Models; A Survey on Automatic Speech Recognition for Resource Scarce Urdu Language </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Sharif,+M">Muhammad Sharif</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Abbas,+Z">Zeeshan Abbas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yi,+J">Jiangyan Yi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Liu,+C">Chenglin Liu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to SN Computer Science </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item21'>[21]</a> <a href ="/abs/2411.14489" title="Abstract" id="2411.14489"> arXiv:2411.14489 </a> (cross-list from cs.CL) [<a href="/pdf/2411.14489" title="Download PDF" id="pdf-2411.14489" aria-labelledby="pdf-2411.14489">pdf</a>, <a href="https://arxiv.org/html/2411.14489v1" title="View HTML" id="html-2411.14489" aria-labelledby="html-2411.14489" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14489" title="Other formats" id="oth-2411.14489" aria-labelledby="oth-2411.14489">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> GhostRNN: Reducing State Redundancy in RNN with Cheap Operations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhou,+H">Hang Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zheng,+X">Xiaoxu Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+Y">Yunhe Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Mi,+M+B">Michael Bi Mi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xiong,+D">Deyi Xiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Han,+K">Kai Han</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Proc. INTERSPEECH 2023, 226-230 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item22'>[22]</a> <a href ="/abs/2411.14453" title="Abstract" id="2411.14453"> arXiv:2411.14453 </a> (cross-list from cs.CL) [<a href="/pdf/2411.14453" title="Download PDF" id="pdf-2411.14453" aria-labelledby="pdf-2411.14453">pdf</a>, <a href="https://arxiv.org/html/2411.14453v1" title="View HTML" id="html-2411.14453" aria-labelledby="html-2411.14453" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14453" title="Other formats" id="oth-2411.14453" aria-labelledby="oth-2411.14453">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Direct Speech-to-Speech Neural Machine Translation: A Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Gupta,+M">Mahendra Gupta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Dutta,+M">Maitreyee Dutta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Maurya,+C+K">Chandresh Kumar Maurya</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> </dl> <dl id='articles'> <h3>Fri, 22 Nov 2024 (showing 6 of 6 entries )</h3> <dt> <a name='item23'>[23]</a> <a href ="/abs/2411.14207" title="Abstract" id="2411.14207"> arXiv:2411.14207 </a> [<a href="/pdf/2411.14207" title="Download PDF" id="pdf-2411.14207" aria-labelledby="pdf-2411.14207">pdf</a>, <a href="https://arxiv.org/html/2411.14207v1" title="View HTML" id="html-2411.14207" aria-labelledby="html-2411.14207" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14207" title="Other formats" id="oth-2411.14207" aria-labelledby="oth-2411.14207">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> HARP: A Large-Scale Higher-Order Ambisonic Room Impulse Response Dataset </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Saini,+S">Shivam Saini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Peissig,+J">J眉rgen Peissig</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to ICASSP 2025 Workshop Dataset and code to be uploaded at: <a href="https://github.com/whojavumusic/HARP" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Multimedia (cs.MM); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item24'>[24]</a> <a href ="/abs/2411.13811" title="Abstract" id="2411.13811"> arXiv:2411.13811 </a> [<a href="/pdf/2411.13811" title="Download PDF" id="pdf-2411.13811" aria-labelledby="pdf-2411.13811">pdf</a>, <a href="https://arxiv.org/html/2411.13811v2" title="View HTML" id="html-2411.13811" aria-labelledby="html-2411.13811" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13811" title="Other formats" id="oth-2411.13811" aria-labelledby="oth-2411.13811">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> X-CrossNet: A complex spectral mapping approach to target speaker extraction with cross attention speaker embedding fusion </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Sun,+C">Chang Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Qin,+B">Bo Qin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Multimedia (cs.MM); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item25'>[25]</a> <a href ="/abs/2411.13766" title="Abstract" id="2411.13766"> arXiv:2411.13766 </a> [<a href="/pdf/2411.13766" title="Download PDF" id="pdf-2411.13766" aria-labelledby="pdf-2411.13766">pdf</a>, <a href="https://arxiv.org/html/2411.13766v2" title="View HTML" id="html-2411.13766" aria-labelledby="html-2411.13766" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13766" title="Other formats" id="oth-2411.13766" aria-labelledby="oth-2411.13766">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Tiny-Align: Bridging Automatic Speech Recognition and Large Language Model on the Edge </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Qin,+R">Ruiyang Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Liu,+D">Dancheng Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xu,+G">Gelei Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yan,+Z">Zheyu Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xu,+C">Chenhui Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Hu,+Y">Yuting Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Hu,+X+S">X. Sharon Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xiong,+J">Jinjun Xiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Shi,+Y">Yiyu Shi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item26'>[26]</a> <a href ="/abs/2411.13592" title="Abstract" id="2411.13592"> arXiv:2411.13592 </a> [<a href="/pdf/2411.13592" title="Download PDF" id="pdf-2411.13592" aria-labelledby="pdf-2411.13592">pdf</a>, <a href="https://arxiv.org/html/2411.13592v1" title="View HTML" id="html-2411.13592" aria-labelledby="html-2411.13592" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13592" title="Other formats" id="oth-2411.13592" aria-labelledby="oth-2411.13592">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Novel Speech Analysis and Correction Tool for Arabic-Speaking Children </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Berriche,+L">Lamia Berriche</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Driss,+M">Maha Driss</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Almuntashri,+A+A">Areej Ahmed Almuntashri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Lghabi,+A+M">Asma Mufreh Lghabi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Almudhi,+H+S">Heba Saleh Almudhi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Almansour,+M+A">Munerah Abdul-Aziz Almansour</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI) </div> </div> </dd> <dt> <a name='item27'>[27]</a> <a href ="/abs/2411.13674" title="Abstract" id="2411.13674"> arXiv:2411.13674 </a> (cross-list from cs.CV) [<a href="/pdf/2411.13674" title="Download PDF" id="pdf-2411.13674" aria-labelledby="pdf-2411.13674">pdf</a>, <a href="https://arxiv.org/html/2411.13674v1" title="View HTML" id="html-2411.13674" aria-labelledby="html-2411.13674" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13674" title="Other formats" id="oth-2411.13674" aria-labelledby="oth-2411.13674">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FabuLight-ASD: Unveiling Speech Activity via Body Language </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Carneiro,+H">Hugo Carneiro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wermter,+S">Stefan Wermter</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 23 pages, 8 figures, 3 tables, accepted for publication in Neural Computing and Applications </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG); Neural and Evolutionary Computing (cs.NE); Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item28'>[28]</a> <a href ="/abs/2411.13577" title="Abstract" id="2411.13577"> arXiv:2411.13577 </a> (cross-list from eess.AS) [<a href="/pdf/2411.13577" title="Download PDF" id="pdf-2411.13577" aria-labelledby="pdf-2411.13577">pdf</a>, <a href="https://arxiv.org/html/2411.13577v2" title="View HTML" id="html-2411.13577" aria-labelledby="html-2411.13577" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13577" title="Other formats" id="oth-2411.13577" aria-labelledby="oth-2411.13577">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> WavChat: A Survey of Spoken Dialogue Models </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Ji,+S">Shengpeng Ji</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Chen,+Y">Yifu Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Fang,+M">Minghui Fang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Zuo,+J">Jialong Zuo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Lu,+J">Jingyu Lu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Wang,+H">Hanting Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Jiang,+Z">Ziyue Jiang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Zhou,+L">Long Zhou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Liu,+S">Shujie Liu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Cheng,+X">Xize Cheng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Yang,+X">Xiaoda Yang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Wang,+Z">Zehan Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Yang,+Q">Qian Yang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Li,+J">Jian Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Jiang,+Y">Yidi Jiang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=He,+J">Jingzhen He</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Chu,+Y">Yunfei Chu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Xu,+J">Jin Xu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Zhao,+Z">Zhou Zhao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 60 papes, working in progress </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Audio and Speech Processing (eess.AS)</span>; Computation and Language (cs.CL); Machine Learning (cs.LG); Multimedia (cs.MM); Sound (cs.SD) </div> </div> </dd> </dl> <dl id='articles'> <h3>Thu, 21 Nov 2024 (showing 8 of 8 entries )</h3> <dt> <a name='item29'>[29]</a> <a href ="/abs/2411.13424" title="Abstract" id="2411.13424"> arXiv:2411.13424 </a> [<a href="/pdf/2411.13424" title="Download PDF" id="pdf-2411.13424" aria-labelledby="pdf-2411.13424">pdf</a>, <a href="/format/2411.13424" title="Other formats" id="oth-2411.13424" aria-labelledby="oth-2411.13424">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CAFE A Novel Code switching Dataset for Algerian Dialect French and English </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Lachemat,+H+E">Houssam Eddine-Othman Lachemat</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Abbas,+A">Akli Abbas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Oukas,+N">Nourredine Oukas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Kheir,+Y+E">Yassine El Kheir</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Haboussi,+S">Samia Haboussi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Shammur,+A+C">Absar Chowdhury Shammur</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 24 pages, submitted to tallip </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Computation and Language (cs.CL); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item30'>[30]</a> <a href ="/abs/2411.13314" title="Abstract" id="2411.13314"> arXiv:2411.13314 </a> [<a href="/pdf/2411.13314" title="Download PDF" id="pdf-2411.13314" aria-labelledby="pdf-2411.13314">pdf</a>, <a href="https://arxiv.org/html/2411.13314v1" title="View HTML" id="html-2411.13314" aria-labelledby="html-2411.13314" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13314" title="Other formats" id="oth-2411.13314" aria-labelledby="oth-2411.13314">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> I2TTS: Image-indicated Immersive Text-to-speech Synthesis with Spatial Perception </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhang,+J">Jiawei Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhang,+T">Tian-Hao Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+J">Jun Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Gao,+J">Jiaran Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Qian,+X">Xinyuan Qian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yin,+X">Xu-Cheng Yin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5pages,4figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item31'>[31]</a> <a href ="/abs/2411.13209" title="Abstract" id="2411.13209"> arXiv:2411.13209 </a> [<a href="/pdf/2411.13209" title="Download PDF" id="pdf-2411.13209" aria-labelledby="pdf-2411.13209">pdf</a>, <a href="https://arxiv.org/html/2411.13209v1" title="View HTML" id="html-2411.13209" aria-labelledby="html-2411.13209" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13209" title="Other formats" id="oth-2411.13209" aria-labelledby="oth-2411.13209">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Comparative Analysis of Audio Feature Extraction for Real-Time Talking Portrait Synthesis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Salehi,+P">Pegah Salehi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Sheshkal,+S+A">Sajad Amouei Sheshkal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Thambawita,+V">Vajira Thambawita</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Gautam,+S">Sushant Gautam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Sabet,+S+S">Saeed S. Sabet</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Johansen,+D">Dag Johansen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Riegler,+M+A">Michael A. Riegler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Halvorsen,+P">P氓l Halvorsen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 6 figures, 3 tables. submitted to MDPI journal in as Big Data and Cognitive Computing </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Human-Computer Interaction (cs.HC); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item32'>[32]</a> <a href ="/abs/2411.13179" title="Abstract" id="2411.13179"> arXiv:2411.13179 </a> [<a href="/pdf/2411.13179" title="Download PDF" id="pdf-2411.13179" aria-labelledby="pdf-2411.13179">pdf</a>, <a href="/format/2411.13179" title="Other formats" id="oth-2411.13179" aria-labelledby="oth-2411.13179">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SONNET: Enhancing Time Delay Estimation by Leveraging Simulated Audio </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Tegler,+E">Erik Tegler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Oskarsson,+M">Magnus Oskarsson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=%C3%85str%C3%B6m,+K">Kalle 脜str枚m</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Computer Vision and Pattern Recognition (cs.CV); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item33'>[33]</a> <a href ="/abs/2411.13224" title="Abstract" id="2411.13224"> arXiv:2411.13224 </a> (cross-list from cs.HC) [<a href="/pdf/2411.13224" title="Download PDF" id="pdf-2411.13224" aria-labelledby="pdf-2411.13224">pdf</a>, <a href="/format/2411.13224" title="Other formats" id="oth-2411.13224" aria-labelledby="oth-2411.13224">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Building music with Lego bricks and Raspberry Pi </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Barbancho,+A+M">Ana M. Barbancho</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Tardon,+L+J">Lorenzo J. Tardon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Barbancho,+I">Isabel Barbancho</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 21 pages </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Multimedia Tools and Applications, 83, 10503-10523, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item34'>[34]</a> <a href ="/abs/2411.13217" title="Abstract" id="2411.13217"> arXiv:2411.13217 </a> (cross-list from eess.SP) [<a href="/pdf/2411.13217" title="Download PDF" id="pdf-2411.13217" aria-labelledby="pdf-2411.13217">pdf</a>, <a href="/format/2411.13217" title="Other formats" id="oth-2411.13217" aria-labelledby="oth-2411.13217">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Energy-based features and bi-LSTM neural network for EEG-based music and voice classification </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Ariza,+I">Isaac Ariza</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Barbancho,+A+M">Ana M. Barbancho</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Tardon,+L+J">Lorenzo J. Tardon</a>, <a href="https://arxiv.org/search/eess?searchtype=author&amp;query=Barbancho,+I">Isabel Barbancho</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 12 pages </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Neural Comput and Applic 36, 791-802, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item35'>[35]</a> <a href ="/abs/2411.13159" title="Abstract" id="2411.13159"> arXiv:2411.13159 </a> (cross-list from cs.CL) [<a href="/pdf/2411.13159" title="Download PDF" id="pdf-2411.13159" aria-labelledby="pdf-2411.13159">pdf</a>, <a href="https://arxiv.org/html/2411.13159v1" title="View HTML" id="html-2411.13159" aria-labelledby="html-2411.13159" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13159" title="Other formats" id="oth-2411.13159" aria-labelledby="oth-2411.13159">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hard-Synth: Synthesizing Diverse Hard Samples for ASR using Zero-Shot TTS and LLM </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yu,+J">Jiawei Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Li,+Y">Yuang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Qiao,+X">Xiaosong Qiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhao,+H">Huan Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhao,+X">Xiaofeng Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Tang,+W">Wei Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhang,+M">Min Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Yang,+H">Hao Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Su,+J">Jinsong Su</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> <dt> <a name='item36'>[36]</a> <a href ="/abs/2411.13089" title="Abstract" id="2411.13089"> arXiv:2411.13089 </a> (cross-list from cs.CV) [<a href="/pdf/2411.13089" title="Download PDF" id="pdf-2411.13089" aria-labelledby="pdf-2411.13089">pdf</a>, <a href="/format/2411.13089" title="Other formats" id="oth-2411.13089" aria-labelledby="oth-2411.13089">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ESARM: 3D Emotional Speech-to-Animation via Reward Model from Automatically-Ranked Demonstrations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zhang,+X">Xulong Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Qu,+X">Xiaoyang Qu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Shi,+H">Haoxiang Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xiao,+C">Chunguang Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+J">Jianzong Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This paper has issues. We have already contacted HPCC for withdrawal and now need to withdraw it from arXiv as well </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> </div> </dd> </dl> <div class='paging'>Total of 36 entries </div> <div class='morefewer'>Showing up to 50 entries per page: <a href=/list/cs.SD/recent?skip=0&amp;show=25 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em"> <!-- Macro-Column 1 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- End Macro-Column 1 --> <!-- Macro-Column 2 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> <!-- End Macro-Column 2 --> </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10