CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 503 results for author: <span class="mathjax">Chen, W</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Chen%2C+W">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Chen, W"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Chen%2C+W&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Chen, W"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Chen%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Chen%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+W&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+W&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18153">arXiv:2411.18153</a> <span> [<a href="https://arxiv.org/pdf/2411.18153">pdf</a>, <a href="https://arxiv.org/ps/2411.18153">ps</a>, <a href="https://arxiv.org/format/2411.18153">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Learning Rate-Compatible Linear Block Codes: An Auto-Encoder Based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cheng%2C+Y">Yukun Cheng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/eess?searchtype=author&query=Hou%2C+T">Tianwei Hou</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a>, <a href="/search/eess?searchtype=author&query=Ai%2C+B">Bo Ai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18153v1-abstract-short" style="display: inline;"> Artificial intelligence (AI) provides an alternative way to design channel coding with affordable complexity. However, most existing studies can only learn codes for a given size and rate, typically defined by a fixed network architecture and a set of parameters. The support of multiple code rates is essential for conserving bandwidth under varying channel conditions while it is costly to store mu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18153v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18153v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18153v1-abstract-full" style="display: none;"> Artificial intelligence (AI) provides an alternative way to design channel coding with affordable complexity. However, most existing studies can only learn codes for a given size and rate, typically defined by a fixed network architecture and a set of parameters. The support of multiple code rates is essential for conserving bandwidth under varying channel conditions while it is costly to store multiple AI models or parameter sets. In this article, we propose an auto-encoder (AE) based rate-compatible linear block codes (RC-LBCs). The coding process associated with AI or non-AI decoders and multiple puncturing patterns is optimized in a data-driven manner. The superior performance of the proposed AI-based RC-LBC is demonstrated through our numerical experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18153v1-abstract-full').style.display = 'none'; document.getElementById('2411.18153v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14088">arXiv:2411.14088</a> <span> [<a href="https://arxiv.org/pdf/2411.14088">pdf</a>, <a href="https://arxiv.org/format/2411.14088">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Channel Customization for Low-Complexity CSI Acquisition in Multi-RIS-Assisted MIMO Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weicong Chen</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Y">Yu Han</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+C">Chao-Kai Wen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiao Li</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+S">Shi Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14088v1-abstract-short" style="display: inline;"> The deployment of multiple reconfigurable intelligent surfaces (RISs) enhances the propagation environment by improving channel quality, but it also complicates channel estimation. Following the conventional wireless communication system design, which involves full channel state information (CSI) acquisition followed by RIS configuration, can reduce transmission efficiency due to substantial pilot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14088v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14088v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14088v1-abstract-full" style="display: none;"> The deployment of multiple reconfigurable intelligent surfaces (RISs) enhances the propagation environment by improving channel quality, but it also complicates channel estimation. Following the conventional wireless communication system design, which involves full channel state information (CSI) acquisition followed by RIS configuration, can reduce transmission efficiency due to substantial pilot overhead and computational complexity. This study introduces an innovative approach that integrates CSI acquisition and RIS configuration, leveraging the channel-altering capabilities of the RIS to reduce both the overhead and complexity of CSI acquisition. The focus is on multi-RIS-assisted systems, featuring both direct and reflected propagation paths. By applying a fast-varying reflection sequence during RIS configuration for channel training, the complex problem of channel estimation is decomposed into simpler, independent tasks. These fast-varying reflections effectively isolate transmit signals from different paths, streamlining the CSI acquisition process for both uplink and downlink communications with reduced complexity. In uplink scenarios, a positioning-based algorithm derives partial CSI, informing the adjustment of RIS parameters to create a sparse reflection channel, enabling precise reconstruction of the uplink channel. Downlink communication benefits from this strategically tailored reflection channel, allowing effective CSI acquisition with fewer pilot signals. Simulation results highlight the proposed methodology's ability to accurately reconstruct the reflection channel with minimal impact on the normalized mean square error while simultaneously enhancing spectral efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14088v1-abstract-full').style.display = 'none'; document.getElementById('2411.14088v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE JSAC special issue on Next Generation Advanced Transceiver Technologies</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14052">arXiv:2411.14052</a> <span> [<a href="https://arxiv.org/pdf/2411.14052">pdf</a>, <a href="https://arxiv.org/ps/2411.14052">ps</a>, <a href="https://arxiv.org/format/2411.14052">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Trajectory and Power Control in Ultra-Dense UAV Networks: A Mean-Field Reinforcement Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Song%2C+F">Fei Song</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhe Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jun Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+L">Long Shi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+S">Shi Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14052v1-abstract-short" style="display: inline;"> In ultra-dense unmanned aerial vehicle (UAV) networks, it is challenging to coordinate the resource allocation and interference management among large-scale UAVs, for providing flexible and efficient service coverage to the ground users (GUs). In this paper, we propose a learning-based resource allocation scheme in an ultra-dense UAV communication network, where the GUs' service demands are time-v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14052v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14052v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14052v1-abstract-full" style="display: none;"> In ultra-dense unmanned aerial vehicle (UAV) networks, it is challenging to coordinate the resource allocation and interference management among large-scale UAVs, for providing flexible and efficient service coverage to the ground users (GUs). In this paper, we propose a learning-based resource allocation scheme in an ultra-dense UAV communication network, where the GUs' service demands are time-varying with unknown distributions. We formulate the non-cooperative game among multiple co-channel UAVs as a stochastic game, where each UAV jointly optimizes its trajectory, user association, and downlink power control to maximize the expectation of its locally cumulative energy efficiency under the interference and energy constraints. To cope with the scalability issue in a large-scale network, we further formulate the problem as a mean-field game (MFG), which simplifies the interactions among the UAVs into a two-player game between a representative UAV and a mean-field. We prove the existence and uniqueness of the equilibrium for the MFG, and propose a model-free mean-field reinforcement learning algorithm named maximum entropy mean-field deep Q network (ME-MFDQN) to solve the mean-field equilibrium in both fully and partially observable scenarios. The simulation results reveal that the proposed algorithm improves the energy efficiency compared with the benchmark algorithms. Moreover, the performance can be further enhanced if the GUs' service demands exhibit higher temporal correlation or if the UAVs have wider observation capabilities over their nearby GUs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14052v1-abstract-full').style.display = 'none'; document.getElementById('2411.14052v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13785">arXiv:2411.13785</a> <span> [<a href="https://arxiv.org/pdf/2411.13785">pdf</a>, <a href="https://arxiv.org/ps/2411.13785">ps</a>, <a href="https://arxiv.org/format/2411.13785">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Throughput Maximization for Movable Antenna Systems with Movement Delay Consideration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+H">Honghao Wang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Ying Gao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Mei%2C+W">Weidong Mei</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+G">Guojie Hu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Lexi Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13785v1-abstract-short" style="display: inline;"> In this paper, we model the minimum achievable throughput within a transmission block of restricted duration and aim to maximize it in movable antenna (MA)-enabled multiuser downlink communications. Particularly, we account for the antenna moving delay caused by mechanical movement, which has not been fully considered in previous studies, and reveal the trade-off between the delay and signal-to-in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13785v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13785v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13785v1-abstract-full" style="display: none;"> In this paper, we model the minimum achievable throughput within a transmission block of restricted duration and aim to maximize it in movable antenna (MA)-enabled multiuser downlink communications. Particularly, we account for the antenna moving delay caused by mechanical movement, which has not been fully considered in previous studies, and reveal the trade-off between the delay and signal-to-interference-plus-noise ratio at users. To this end, we first consider a single-user setup to analyze the necessity of antenna movement. By quantizing the virtual angles of arrival, we derive the requisite region size for antenna moving, design the initial MA position, and elucidate the relationship between quantization resolution and moving region size. Furthermore, an efficient algorithm is developed to optimize MA position via successive convex approximation, which is subsequently extended to the general multiuser setup. Numerical results demonstrate that the proposed algorithms outperform fixed-position antenna schemes and existing ones without consideration of movement delay. Additionally, our algorithms exhibit excellent adaptability and stability across various transmission block durations and moving region sizes, and are robust to different antenna moving speeds. This allows the hardware cost of MA-aided systems to be reduced by employing low rotational speed motors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13785v1-abstract-full').style.display = 'none'; document.getElementById('2411.13785v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11030">arXiv:2411.11030</a> <span> [<a href="https://arxiv.org/pdf/2411.11030">pdf</a>, <a href="https://arxiv.org/format/2411.11030">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> IREE Oriented Active RIS-Assisted Green communication System with Outdated CSI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cao%2C+K">Kai Cao</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+T">Tao Yu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jihong Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xiaojing Chen</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Yanzan Sun</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Shunqing Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11030v1-abstract-short" style="display: inline;"> The rapid evolution of communication technologies has spurred a growing demand for energy-efficient network architectures and performance metrics. Active Reconfigurable Intelligent Surfaces (RIS) are emerging as a key component in green network architectures. Compared to passive RIS, active RIS are equipped with amplifiers on each reflecting element, allowing them to simultaneously reflect and amp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11030v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11030v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11030v1-abstract-full" style="display: none;"> The rapid evolution of communication technologies has spurred a growing demand for energy-efficient network architectures and performance metrics. Active Reconfigurable Intelligent Surfaces (RIS) are emerging as a key component in green network architectures. Compared to passive RIS, active RIS are equipped with amplifiers on each reflecting element, allowing them to simultaneously reflect and amplify signals, thereby overcoming the double multiplicative fading in the phase response, and improving both system coverage and performance. Additionally, the Integrated Relative Energy Efficiency (IREE) metric, as introduced in [1], addresses the dynamic variations in traffic and capacity over time and space, enabling more energy-efficient wireless systems. Building on these advancements, this paper investigates the problem of maximizing IREE in active RIS-assisted green communication systems. However, acquiring perfect Channel State Information (CSI) in practical systems poses significant challenges and costs. To address this, we derive the average achievable rate based on outdated CSI and formulated the corresponding IREE maximization problem, which is solved by jointly optimizing beamforming at both the base station and RIS. Given the non-convex nature of the problem, we propose an Alternating Optimization Successive Approximation (AOSO) algorithm. By applying quadratic transform and relaxation techniques, we simplify the original problem and alternately optimize the beamforming matrices at the base station and RIS. Furthermore, to handle the discrete constraints of the RIS reflection coefficients, we develop a successive approximation method. Experimental results validate our theoretical analysis of the algorithm's convergence , demonstrating the effectiveness of the proposed algorithm and highlighting the superiority of IREE in enhancing the performance of green communication networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11030v1-abstract-full').style.display = 'none'; document.getElementById('2411.11030v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09426">arXiv:2411.09426</a> <span> [<a href="https://arxiv.org/pdf/2411.09426">pdf</a>, <a href="https://arxiv.org/ps/2411.09426">ps</a>, <a href="https://arxiv.org/format/2411.09426">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Movable Antenna Enhanced Networked Full-Duplex Integrated Sensing and Communication System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yuan Guo</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qiong Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kunlun Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jun Li</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Lexi Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09426v2-abstract-short" style="display: inline;"> Integrated sensing and communication (ISAC) is envisioned as a key technology for future sixth-generation (6G) networks. Classical ISAC system considering monostatic and/or bistatic settings will inevitably degrade both communication and sensing performance due to the limited service coverage and easily blocked transmission paths. Besides, existing ISAC studies usually focus on downlink (DL) or up… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09426v2-abstract-full').style.display = 'inline'; document.getElementById('2411.09426v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09426v2-abstract-full" style="display: none;"> Integrated sensing and communication (ISAC) is envisioned as a key technology for future sixth-generation (6G) networks. Classical ISAC system considering monostatic and/or bistatic settings will inevitably degrade both communication and sensing performance due to the limited service coverage and easily blocked transmission paths. Besides, existing ISAC studies usually focus on downlink (DL) or uplink (UL) communication demands and unable to achieve the systematic DL and UL communication tasks. These challenges can be overcome by networked FD ISAC framework. Moreover, ISAC generally considers the trade-off between communication and sensing, unavoidably leading to a loss in communication performance. This shortcoming can be solved by the emerging movable antenna (MA) technology. In this paper, we utilize the MA to promote communication capability with guaranteed sensing performance via jointly designing beamforming, power allocation, receiving filters and MA configuration towards maximizing sum rate. The optimization problem is highly difficult due to the unique channel model deriving from the MA. To resolve this challenge, via leveraging the cutting-the-edge majorization-minimization (MM) method, we develop an efficient solution that optimizes all variables via convex optimization techniques. Extensive simulation results verify the effectiveness of our proposed algorithms and demonstrate the substantial performance promotion by deploying MA in the networked FD ISAC system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09426v2-abstract-full').style.display = 'none'; document.getElementById('2411.09426v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08264">arXiv:2411.08264</a> <span> [<a href="https://arxiv.org/pdf/2411.08264">pdf</a>, <a href="https://arxiv.org/format/2411.08264">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Sensing-Assisted Beam Tracking with Real-Time Beamwidth Adaptation for THz Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wuhan Chen</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+Y">Yuheng Fan</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chuang Yang</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+M">Mugen Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08264v1-abstract-short" style="display: inline;"> Terahertz (THz) communications, with their substantial bandwidth, are essential for meeting the ultra-high data rate demands of emerging high-mobility scenarios such as vehicular-to-everything (V2X) networks. In these contexts, beamwidth adaptation has been explored to address the problem that high-mobility targets frequently move out of the narrow THz beam range. However, existing approaches cann… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08264v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08264v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08264v1-abstract-full" style="display: none;"> Terahertz (THz) communications, with their substantial bandwidth, are essential for meeting the ultra-high data rate demands of emerging high-mobility scenarios such as vehicular-to-everything (V2X) networks. In these contexts, beamwidth adaptation has been explored to address the problem that high-mobility targets frequently move out of the narrow THz beam range. However, existing approaches cannot effectively track targets due to a lack of real-time motion awareness. Consequently, we propose a sensing-assisted beam tracking scheme with real-time beamwidth adaptation. Specifically, the base station (BS) periodically collects prior sensing information to predict the target's motion path by applying a particular motion model. Then, we build a pre-calculated codebook by optimising precoders to align the beamwidth with various predicted target paths, thereby maximising the average achievable data rates within each sensing period. Finally, the BS selects the optimal precoder from the codebook to maintain stable and continuous connectivity. Simulation results show that the proposed scheme significantly improves the rate performance and reduces outage probability compared to existing approaches under various target mobility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08264v1-abstract-full').style.display = 'none'; document.getElementById('2411.08264v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07728">arXiv:2411.07728</a> <span> [<a href="https://arxiv.org/pdf/2411.07728">pdf</a>, <a href="https://arxiv.org/format/2411.07728">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> No-Reference Point Cloud Quality Assessment via Graph Convolutional Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wu Chen</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Q">Qiuping Jiang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+W">Wei Zhou</a>, <a href="/search/eess?searchtype=author&query=Shao%2C+F">Feng Shao</a>, <a href="/search/eess?searchtype=author&query=Zhai%2C+G">Guangtao Zhai</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+W">Weisi Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07728v1-abstract-short" style="display: inline;"> Three-dimensional (3D) point cloud, as an emerging visual media format, is increasingly favored by consumers as it can provide more realistic visual information than two-dimensional (2D) data. Similar to 2D plane images and videos, point clouds inevitably suffer from quality degradation and information loss through multimedia communication systems. Therefore, automatic point cloud quality assessme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07728v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07728v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07728v1-abstract-full" style="display: none;"> Three-dimensional (3D) point cloud, as an emerging visual media format, is increasingly favored by consumers as it can provide more realistic visual information than two-dimensional (2D) data. Similar to 2D plane images and videos, point clouds inevitably suffer from quality degradation and information loss through multimedia communication systems. Therefore, automatic point cloud quality assessment (PCQA) is of critical importance. In this work, we propose a novel no-reference PCQA method by using a graph convolutional network (GCN) to characterize the mutual dependencies of multi-view 2D projected image contents. The proposed GCN-based PCQA (GC-PCQA) method contains three modules, i.e., multi-view projection, graph construction, and GCN-based quality prediction. First, multi-view projection is performed on the test point cloud to obtain a set of horizontally and vertically projected images. Then, a perception-consistent graph is constructed based on the spatial relations among different projected images. Finally, reasoning on the constructed graph is performed by GCN to characterize the mutual dependencies and interactions between different projected images, and aggregate feature information of multi-view projected images for final quality prediction. Experimental results on two publicly available benchmark databases show that our proposed GC-PCQA can achieve superior performance than state-of-the-art quality assessment metrics. The code will be available at: https://github.com/chenwuwq/GC-PCQA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07728v1-abstract-full').style.display = 'none'; document.getElementById('2411.07728v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Multimedia</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07111">arXiv:2411.07111</a> <span> [<a href="https://arxiv.org/pdf/2411.07111">pdf</a>, <a href="https://arxiv.org/format/2411.07111">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Building a Taiwanese Mandarin Spoken Language Model: A First Attempt </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chih-Kai Yang</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+Y">Yu-Kuan Fu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chen-An Li</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yi-Cheng Lin</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yu-Xiang Lin</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei-Chih Chen</a>, <a href="/search/eess?searchtype=author&query=Chung%2C+H+L">Ho Lam Chung</a>, <a href="/search/eess?searchtype=author&query=Kuan%2C+C">Chun-Yi Kuan</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+W">Wei-Ping Huang</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+K">Ke-Han Lu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+T">Tzu-Quan Lin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hsiu-Hsuan Wang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+E">En-Pei Hu</a>, <a href="/search/eess?searchtype=author&query=Hsu%2C+C">Chan-Jan Hsu</a>, <a href="/search/eess?searchtype=author&query=Tseng%2C+L">Liang-Hsuan Tseng</a>, <a href="/search/eess?searchtype=author&query=Chiu%2C+I">I-Hsiang Chiu</a>, <a href="/search/eess?searchtype=author&query=Sanga%2C+U">Ulin Sanga</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xuanjun Chen</a>, <a href="/search/eess?searchtype=author&query=Hsu%2C+P">Po-chun Hsu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+S">Shu-wen Yang</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+H">Hung-yi Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07111v1-abstract-short" style="display: inline;"> This technical report presents our initial attempt to build a spoken large language model (LLM) for Taiwanese Mandarin, specifically tailored to enable real-time, speech-to-speech interaction in multi-turn conversations. Our end-to-end model incorporates a decoder-only transformer architecture and aims to achieve seamless interaction while preserving the conversational flow, including full-duplex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07111v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07111v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07111v1-abstract-full" style="display: none;"> This technical report presents our initial attempt to build a spoken large language model (LLM) for Taiwanese Mandarin, specifically tailored to enable real-time, speech-to-speech interaction in multi-turn conversations. Our end-to-end model incorporates a decoder-only transformer architecture and aims to achieve seamless interaction while preserving the conversational flow, including full-duplex capabilities allowing simultaneous speaking and listening. The paper also details the training process, including data preparation with synthesized dialogues and adjustments for real-time interaction. We also developed a platform to evaluate conversational fluency and response coherence in multi-turn dialogues. We hope the release of the report can contribute to the future development of spoken LLMs in Taiwanese Mandarin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07111v1-abstract-full').style.display = 'none'; document.getElementById('2411.07111v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06552">arXiv:2411.06552</a> <span> [<a href="https://arxiv.org/pdf/2411.06552">pdf</a>, <a href="https://arxiv.org/format/2411.06552">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> CASC: Condition-Aware Semantic Communication with Latent Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weixuan Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Q">Qianqian Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06552v1-abstract-short" style="display: inline;"> Diffusion-based semantic communication methods have shown significant advantages in image transmission by harnessing the generative power of diffusion models. However, they still face challenges, including generation randomness that leads to distorted reconstructions and high computational costs. To address these issues, we propose CASC, a condition-aware semantic communication framework that inco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06552v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06552v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06552v1-abstract-full" style="display: none;"> Diffusion-based semantic communication methods have shown significant advantages in image transmission by harnessing the generative power of diffusion models. However, they still face challenges, including generation randomness that leads to distorted reconstructions and high computational costs. To address these issues, we propose CASC, a condition-aware semantic communication framework that incorporates a latent diffusion model (LDM)-based denoiser. The LDM denoiser at the receiver utilizes the received noisy latent codes as the conditioning signal to reconstruct the latent codes, enabling the decoder to accurately recover the source image. By operating in the latent space, the LDM reduces computational complexity compared to traditional diffusion models (DMs). Additionally, we introduce a condition-aware neural network (CAN) that dynamically adjusts the weights in the hidden layers of the LDM based on the conditioning signal. This enables finer control over the generation process, significantly improving the perceptual quality of the reconstructed images. Experimental results show that CASC significantly outperforms DeepJSCC in both perceptual quality and visual effect. Moreover, CASC reduces inference time by 51.7% compared to existing DM-based semantic communication systems, while maintaining comparable perceptual performance. The ablation studies also validate the effectiveness of the CAN module in improving the image reconstruction quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06552v1-abstract-full').style.display = 'none'; document.getElementById('2411.06552v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05361">arXiv:2411.05361</a> <span> [<a href="https://arxiv.org/pdf/2411.05361">pdf</a>, <a href="https://arxiv.org/format/2411.05361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Dynamic-SUPERB Phase-2: A Collaboratively Expanding Benchmark for Measuring the Capabilities of Spoken Language Models with 180 Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chien-yu Huang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei-Chih Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+S">Shu-wen Yang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+A+T">Andy T. Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chen-An Li</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yu-Xiang Lin</a>, <a href="/search/eess?searchtype=author&query=Tseng%2C+W">Wei-Cheng Tseng</a>, <a href="/search/eess?searchtype=author&query=Diwan%2C+A">Anuj Diwan</a>, <a href="/search/eess?searchtype=author&query=Shih%2C+Y">Yi-Jen Shih</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+J">Jiatong Shi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">William Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xuanjun Chen</a>, <a href="/search/eess?searchtype=author&query=Hsiao%2C+C">Chi-Yuan Hsiao</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+P">Puyuan Peng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shih-Heng Wang</a>, <a href="/search/eess?searchtype=author&query=Kuan%2C+C">Chun-Yi Kuan</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+K">Ke-Han Lu</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+K">Kai-Wei Chang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chih-Kai Yang</a>, <a href="/search/eess?searchtype=author&query=Ritter-Gutierrez%2C+F">Fabian Ritter-Gutierrez</a>, <a href="/search/eess?searchtype=author&query=Chuang%2C+M+T">Ming To Chuang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+K">Kuan-Po Huang</a>, <a href="/search/eess?searchtype=author&query=Arora%2C+S">Siddhant Arora</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">You-Kuan Lin</a>, <a href="/search/eess?searchtype=author&query=Yeo%2C+E">Eunjung Yeo</a> , et al. (53 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05361v1-abstract-short" style="display: inline;"> Multimodal foundation models, such as Gemini and ChatGPT, have revolutionized human-machine interactions by seamlessly integrating various forms of data. Developing a universal spoken language model that comprehends a wide range of natural language instructions is critical for bridging communication gaps and facilitating more intuitive interactions. However, the absence of a comprehensive evaluati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05361v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05361v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05361v1-abstract-full" style="display: none;"> Multimodal foundation models, such as Gemini and ChatGPT, have revolutionized human-machine interactions by seamlessly integrating various forms of data. Developing a universal spoken language model that comprehends a wide range of natural language instructions is critical for bridging communication gaps and facilitating more intuitive interactions. However, the absence of a comprehensive evaluation benchmark poses a significant challenge. We present Dynamic-SUPERB Phase-2, an open and evolving benchmark for the comprehensive evaluation of instruction-based universal speech models. Building upon the first generation, this second version incorporates 125 new tasks contributed collaboratively by the global research community, expanding the benchmark to a total of 180 tasks, making it the largest benchmark for speech and audio evaluation. While the first generation of Dynamic-SUPERB was limited to classification tasks, Dynamic-SUPERB Phase-2 broadens its evaluation capabilities by introducing a wide array of novel and diverse tasks, including regression and sequence generation, across speech, music, and environmental audio. Evaluation results indicate that none of the models performed well universally. SALMONN-13B excelled in English ASR, while WavLLM demonstrated high accuracy in emotion recognition, but current models still require further innovations to handle a broader range of tasks. We will soon open-source all task data and the evaluation pipeline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05361v1-abstract-full').style.display = 'none'; document.getElementById('2411.05361v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04672">arXiv:2411.04672</a> <span> [<a href="https://arxiv.org/pdf/2411.04672">pdf</a>, <a href="https://arxiv.org/format/2411.04672">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Semantic-Aware Resource Management for C-V2X Platooning via Multi-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shao%2C+Z">Zhiyu Shao</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qiong Wu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+P">Pingyi Fan</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kezhi Wang</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+Q">Qiang Fan</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04672v1-abstract-short" style="display: inline;"> This paper presents a semantic-aware multi-modal resource allocation (SAMRA) for multi-task using multi-agent reinforcement learning (MARL), termed SAMRAMARL, utilizing in platoon systems where cellular vehicle-to-everything (C-V2X) communication is employed. The proposed approach leverages the semantic information to optimize the allocation of communication resources. By integrating a distributed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04672v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04672v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04672v1-abstract-full" style="display: none;"> This paper presents a semantic-aware multi-modal resource allocation (SAMRA) for multi-task using multi-agent reinforcement learning (MARL), termed SAMRAMARL, utilizing in platoon systems where cellular vehicle-to-everything (C-V2X) communication is employed. The proposed approach leverages the semantic information to optimize the allocation of communication resources. By integrating a distributed multi-agent reinforcement learning (MARL) algorithm, SAMRAMARL enables autonomous decision-making for each vehicle, channel assignment optimization, power allocation, and semantic symbol length based on the contextual importance of the transmitted information. This semantic-awareness ensures that both vehicle-to-vehicle (V2V) and vehicle-to-infrastructure (V2I) communications prioritize data that is critical for maintaining safe and efficient platoon operations. The framework also introduces a tailored quality of experience (QoE) metric for semantic communication, aiming to maximize QoE in V2V links while improving the success rate of semantic information transmission (SRS). Extensive simulations has demonstrated that SAMRAMARL outperforms existing methods, achieving significant gains in QoE and communication efficiency in C-V2X platooning scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04672v1-abstract-full').style.display = 'none'; document.getElementById('2411.04672v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been submitted to IEEE Journal. The source code has been released at:https://github.com/qiongwu86/Semantic-Aware-Resource-Management-for-C-V2X-Platooning-via-Multi-Agent-Reinforcement-Learning</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04423">arXiv:2411.04423</a> <span> [<a href="https://arxiv.org/pdf/2411.04423">pdf</a>, <a href="https://arxiv.org/format/2411.04423">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Model Predictive Control Enabled UAV Trajectory Optimization and Secure Resource Allocation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhendong Li</a>, <a href="/search/eess?searchtype=author&query=Su%2C+C">Chang Su</a>, <a href="/search/eess?searchtype=author&query=Su%2C+Z">Zhou Su</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+H">Haixia Peng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuntao Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04423v1-abstract-short" style="display: inline;"> In this paper, we investigate a secure communication architecture based on unmanned aerial vehicle (UAV), which enhances the security performance of the communication system through UAV trajectory optimization. We formulate a control problem of minimizing the UAV flight path and power consumption while maximizing secure communication rate over infinite horizon by jointly optimizing UAV trajectory,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04423v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04423v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04423v1-abstract-full" style="display: none;"> In this paper, we investigate a secure communication architecture based on unmanned aerial vehicle (UAV), which enhances the security performance of the communication system through UAV trajectory optimization. We formulate a control problem of minimizing the UAV flight path and power consumption while maximizing secure communication rate over infinite horizon by jointly optimizing UAV trajectory, transmit beamforming vector, and artificial noise (AN) vector. Given the non-uniqueness of optimization objective and significant coupling of the optimization variables, the problem is a non-convex optimization problem which is difficult to solve directly. To address this complex issue, an alternating-iteration technique is employed to decouple the optimization variables. Specifically, the problem is divided into three subproblems, i.e., UAV trajectory, transmit beamforming vector, and AN vector, which are solved alternately. Additionally, considering the susceptibility of UAV trajectory to disturbances, the model predictive control (MPC) approach is applied to obtain UAV trajectory and enhance the system robustness. Numerical results demonstrate the superiority of the proposed optimization algorithm in maintaining accurate UAV trajectory and high secure communication rate compared with other benchmark schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04423v1-abstract-full').style.display = 'none'; document.getElementById('2411.04423v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04419">arXiv:2411.04419</a> <span> [<a href="https://arxiv.org/pdf/2411.04419">pdf</a>, <a href="https://arxiv.org/format/2411.04419">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Joint Discrete Antenna Positioning and Beamforming Optimization in Movable Antenna Enabled Full-Duplex ISAC Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhendong Li</a>, <a href="/search/eess?searchtype=author&query=Ba%2C+J">Jianle Ba</a>, <a href="/search/eess?searchtype=author&query=Su%2C+Z">Zhou Su</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+H">Haixia Peng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuntao Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04419v1-abstract-short" style="display: inline;"> In this paper, we propose a full-duplex integrated sensing and communication (ISAC) system enabled by a movable antenna (MA). By leveraging the characteristic of MA that can increase the spatial diversity gain, the performance of the system can be enhanced. We formulate a problem of minimizing the total transmit power consumption via jointly optimizing the discrete position of MA elements, beamfor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04419v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04419v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04419v1-abstract-full" style="display: none;"> In this paper, we propose a full-duplex integrated sensing and communication (ISAC) system enabled by a movable antenna (MA). By leveraging the characteristic of MA that can increase the spatial diversity gain, the performance of the system can be enhanced. We formulate a problem of minimizing the total transmit power consumption via jointly optimizing the discrete position of MA elements, beamforming vectors, sensing signal covariance matrix and user transmit power. Given the significant coupling of optimization variables, the formulated problem presents a non-convex optimization challenge that poses difficulties for direct resolution. To address this challenging issue, the discrete binary particle swarm optimization (BPSO) algorithm framework is employed to solve the formulated problem. Specifically, the discrete positions of MA elements are first obtained by iteratively solving the fitness function. The difference-of-convex (DC) programming and successive convex approximation (SCA) are used to handle non-convex and rank-1 terms in the fitness function. Once the BPSO iteration is complete, the discrete positions of MA elements can be determined, and we can obtain the solutions for beamforming vectors, sensing signal covariance matrix and user transmit power. Numerical results demonstrate the superiority of the proposed system in reducing the total transmit power consumption compared with fixed antenna arrays. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04419v1-abstract-full').style.display = 'none'; document.getElementById('2411.04419v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00373">arXiv:2411.00373</a> <span> [<a href="https://arxiv.org/pdf/2411.00373">pdf</a>, <a href="https://arxiv.org/format/2411.00373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Discrete RIS Enhanced Space Shift Keying MIMO System via Reflecting Beamforming Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+X">Xusheng Zhu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=He%2C+X">Xinyuan He</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Lexi Xu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yaxin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00373v1-abstract-short" style="display: inline;"> In this paper, a discrete reconfigurable intelligent surface (RIS)-assisted spatial shift keying (SSK) multiple-input multiple-output (MIMO) scheme is investigated, in which a direct link between the transmitter and the receiver is considered. To improve the reliability of the RIS-SSK-MIMO scheme, we formulate an objective function based on minimizing the average bit error probability (ABEP). Sinc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00373v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00373v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00373v1-abstract-full" style="display: none;"> In this paper, a discrete reconfigurable intelligent surface (RIS)-assisted spatial shift keying (SSK) multiple-input multiple-output (MIMO) scheme is investigated, in which a direct link between the transmitter and the receiver is considered. To improve the reliability of the RIS-SSK-MIMO scheme, we formulate an objective function based on minimizing the average bit error probability (ABEP). Since the reflecting phase shift of RIS is discrete, it is difficult to address this problem directly. To this end, we optimize the RIS phase shift to maximize the Euclidean distance between the minimum constellations by applying the successive convex approximation (SCA) and penaltyalternating optimization method. Simulation results verify the superiority of the proposed RIS-SSK-MIMO scheme and demonstrate the impact of the number of RIS elements, the number of phase quantization bits, and the number of receive and transmit antennas in terms of reliability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00373v1-abstract-full').style.display = 'none'; document.getElementById('2411.00373v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22987">arXiv:2410.22987</a> <span> [<a href="https://arxiv.org/pdf/2410.22987">pdf</a>, <a href="https://arxiv.org/format/2410.22987">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> V2X-Assisted Distributed Computing and Control Framework for Connected and Automated Vehicles under Ramp Merging Scenario </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qiong Wu</a>, <a href="/search/eess?searchtype=author&query=Chu%2C+J">Jiahou Chu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+P">Pingyi Fan</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kezhi Wang</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+N">Nan Cheng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22987v1-abstract-short" style="display: inline;"> This paper investigates distributed computing and cooperative control of connected and automated vehicles (CAVs) in ramp merging scenario under transportation cyber-physical system. Firstly, a centralized cooperative trajectory planning problem is formulated subject to the safely constraints and traffic performance in ramp merging scenario, where the trajectories of all vehicles are jointly optimi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22987v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22987v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22987v1-abstract-full" style="display: none;"> This paper investigates distributed computing and cooperative control of connected and automated vehicles (CAVs) in ramp merging scenario under transportation cyber-physical system. Firstly, a centralized cooperative trajectory planning problem is formulated subject to the safely constraints and traffic performance in ramp merging scenario, where the trajectories of all vehicles are jointly optimized. To get rid of the reliance on a central controller and reduce computation time, a distributed solution to this problem implemented among CAVs through Vehicles-to-Everything (V2X) communication is proposed. Unlike existing method, our method can distribute the computational task among CAVs and carry out parallel solving through V2X communication. Then, a multi-vehicles model predictive control (MPC) problem aimed at maximizing system stability and minimizing control input is formulated based on the solution of the first problem subject to strict safety constants and input limits. Due to these complex constraints, this problem becomes high-dimensional, centralized, and non-convex. To solve it in a short time, a decomposition and convex reformulation method, namely distributed cooperative iterative model predictive control (DCIMPC), is proposed. This method leverages the communication capability of CAVs to decompose the problem, making full use of the computational resources on vehicles to achieve fast solutions and distributed control. The two above problems with their corresponding solving methods form the systemic framework of the V2X assisted distributed computing and control. Simulations have been conducted to evaluate the framework's convergence, safety, and solving speed. Additionally, extra experiments are conducted to validate the performance of DCIMPC. The results show that our method can greatly improve computation speed without sacrificing system performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22987v1-abstract-full').style.display = 'none'; document.getElementById('2410.22987v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been submitted to IEEE Journal. The source code has been released at: https://github.com/qiongwu86/V2X-Assisted-Distributed-Computing-and-Control-Framework-for-Connected-and-Automated-Vehicles.git</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20690">arXiv:2410.20690</a> <span> [<a href="https://arxiv.org/pdf/2410.20690">pdf</a>, <a href="https://arxiv.org/format/2410.20690">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> KANsformer for Scalable Beamforming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xie%2C+X">Xinke Xie</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Y">Yang Lu</a>, <a href="/search/eess?searchtype=author&query=Chi%2C+C">Chong-Yung Chi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/eess?searchtype=author&query=Ai%2C+B">Bo Ai</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20690v1-abstract-short" style="display: inline;"> This paper proposes an unsupervised deep-learning (DL) approach by integrating transformer and Kolmogorov-Arnold networks (KAN) termed KANsformer to realize scalable beamforming for mobile communication systems. Specifically, we consider a classic multi-input-single-output energy efficiency maximization problem subject to the total power budget. The proposed KANsformer first extracts hidden featur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20690v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20690v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20690v1-abstract-full" style="display: none;"> This paper proposes an unsupervised deep-learning (DL) approach by integrating transformer and Kolmogorov-Arnold networks (KAN) termed KANsformer to realize scalable beamforming for mobile communication systems. Specifically, we consider a classic multi-input-single-output energy efficiency maximization problem subject to the total power budget. The proposed KANsformer first extracts hidden features via a multi-head self-attention mechanism and then reads out the desired beamforming design via KAN. Numerical results are provided to evaluate the KANsformer in terms of generalization performance, transfer learning and ablation experiment. Overall, the KANsformer outperforms existing benchmark DL approaches, and is adaptable to the change in the number of mobile users with real-time and near-optimal inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20690v1-abstract-full').style.display = 'none'; document.getElementById('2410.20690v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09503">arXiv:2410.09503</a> <span> [<a href="https://arxiv.org/pdf/2410.09503">pdf</a>, <a href="https://arxiv.org/format/2410.09503">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> SLAM-AAC: Enhancing Audio Captioning with Paraphrasing Augmentation and CLAP-Refine through LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wenxi Chen</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiquan Li</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+X">Xuenan Xu</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+Y">Yuzhe Liang</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Z">Zhisheng Zheng</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+K">Kai Yu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09503v1-abstract-short" style="display: inline;"> Automated Audio Captioning (AAC) aims to generate natural textual descriptions for input audio signals. Recent progress in audio pre-trained models and large language models (LLMs) has significantly enhanced audio understanding and textual reasoning capabilities, making improvements in AAC possible. In this paper, we propose SLAM-AAC to further enhance AAC with paraphrasing augmentation and CLAP-R… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09503v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09503v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09503v1-abstract-full" style="display: none;"> Automated Audio Captioning (AAC) aims to generate natural textual descriptions for input audio signals. Recent progress in audio pre-trained models and large language models (LLMs) has significantly enhanced audio understanding and textual reasoning capabilities, making improvements in AAC possible. In this paper, we propose SLAM-AAC to further enhance AAC with paraphrasing augmentation and CLAP-Refine through LLMs. Our approach uses the self-supervised EAT model to extract fine-grained audio representations, which are then aligned with textual embeddings via lightweight linear layers. The caption generation LLM is efficiently fine-tuned using the LoRA adapter. Drawing inspiration from the back-translation method in machine translation, we implement paraphrasing augmentation to expand the Clotho dataset during pre-training. This strategy helps alleviate the limitation of scarce audio-text pairs and generates more diverse captions from a small set of audio clips. During inference, we introduce the plug-and-play CLAP-Refine strategy to fully exploit multiple decoding outputs, akin to the n-best rescoring strategy in speech recognition. Using the CLAP model for audio-text similarity calculation, we could select the textual descriptions generated by multiple searching beams that best match the input audio. Experimental results show that SLAM-AAC achieves state-of-the-art performance on Clotho V2 and AudioCaps, surpassing previous mainstream models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09503v1-abstract-full').style.display = 'none'; document.getElementById('2410.09503v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09472">arXiv:2410.09472</a> <span> [<a href="https://arxiv.org/pdf/2410.09472">pdf</a>, <a href="https://arxiv.org/format/2410.09472">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DRCap: Decoding CLAP Latents with Retrieval-augmented Generation for Zero-shot Audio Captioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiquan Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wenxi Chen</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+X">Xuenan Xu</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+Y">Yuzhe Liang</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Z">Zhisheng Zheng</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+Q">Qiuqiang Kong</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09472v1-abstract-short" style="display: inline;"> While automated audio captioning (AAC) has made notable progress, traditional fully supervised AAC models still face two critical challenges: the need for expensive audio-text pair data for training and performance degradation when transferring across domains. To overcome these limitations, we present DRCap, a data-efficient and flexible zero-shot audio captioning system that requires text-only da… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09472v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09472v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09472v1-abstract-full" style="display: none;"> While automated audio captioning (AAC) has made notable progress, traditional fully supervised AAC models still face two critical challenges: the need for expensive audio-text pair data for training and performance degradation when transferring across domains. To overcome these limitations, we present DRCap, a data-efficient and flexible zero-shot audio captioning system that requires text-only data for training and can quickly adapt to new domains without additional fine-tuning. DRCap integrates a contrastive language-audio pre-training (CLAP) model and a large-language model (LLM) as its backbone. During training, the model predicts the ground-truth caption with a fixed text encoder from CLAP, whereas, during inference, the text encoder is replaced with the audio encoder to generate captions for audio clips in a zero-shot manner. To mitigate the modality gap of the CLAP model, we use both the projection strategy from the encoder side and the retrieval-augmented generation strategy from the decoder side. Specifically, audio embeddings are first projected onto a text embedding support to absorb extensive semantic information within the joint multi-modal space of CLAP. At the same time, similar captions retrieved from a datastore are fed as prompts to instruct the LLM, incorporating external knowledge to take full advantage of its strong generative capability. Conditioned on both the projected CLAP embedding and the retrieved similar captions, the model is able to produce a more accurate and semantically rich textual description. By tailoring the text embedding support and the caption datastore to the target domain, DRCap acquires a robust ability to adapt to new domains in a training-free manner. Experimental results demonstrate that DRCap outperforms all other zero-shot models in in-domain scenarios and achieves state-of-the-art performance in cross-domain scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09472v1-abstract-full').style.display = 'none'; document.getElementById('2410.09472v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08459">arXiv:2410.08459</a> <span> [<a href="https://arxiv.org/pdf/2410.08459">pdf</a>, <a href="https://arxiv.org/format/2410.08459">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Beamforming Design for Intelligent Reffecting Surface Aided Near-Field THz Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qiu%2C+C">Chi Qiu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Hua%2C+M">Meng Hua</a>, <a href="/search/eess?searchtype=author&query=Hao%2C+W">Wanming Hao</a>, <a href="/search/eess?searchtype=author&query=Jian%2C+M">Mengnan Jian</a>, <a href="/search/eess?searchtype=author&query=Hou%2C+F">Fen Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08459v1-abstract-short" style="display: inline;"> Intelligent reflecting surface (IRS) operating in the terahertz (THz) band has recently gained considerable interest due to its high spectrum bandwidth. Due to the exploitation of large scale of IRS, there is a high probability that the transceivers will be situated within the near-field region of the IRS. Thus, the near-field beam split effect poses a major challenge for the design of wideband IR… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08459v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08459v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08459v1-abstract-full" style="display: none;"> Intelligent reflecting surface (IRS) operating in the terahertz (THz) band has recently gained considerable interest due to its high spectrum bandwidth. Due to the exploitation of large scale of IRS, there is a high probability that the transceivers will be situated within the near-field region of the IRS. Thus, the near-field beam split effect poses a major challenge for the design of wideband IRS beamforming, which causes the radiation beam to deviate from its intended location, leading to significant gain losses and limiting the efficient use of available bandwidths. While delay-based IRS has emerged as a potential solution, current beamforming schemes generally assume unbounded range time delays (TDs). In this letter, we first investigate the near-field beam split issue at the IRS. Then, we extend the piece-wise far-field model to the IRS, based on which, a double-layer delta-delay (DLDD) IRS beamforming scheme is proposed. Specifically, we employ an element-grouping strategy and the TD imposed on each sub-surface of IRS is achieved by a series of TD modules. This method significantly reduces the required range of TDs. Numerical results show that the proposed DLDD IRS beamforming scheme can effectively mitigate the near-field beam split and achieve near-optimal performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08459v1-abstract-full').style.display = 'none'; document.getElementById('2410.08459v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05912">arXiv:2410.05912</a> <span> [<a href="https://arxiv.org/pdf/2410.05912">pdf</a>, <a href="https://arxiv.org/format/2410.05912">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Two-Timescale Design for Movable Antennas Enabled-Multiuser MIMO Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zheng%2C+Z">Ziyuan Zheng</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+G">Guojie Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05912v1-abstract-short" style="display: inline;"> Movable antennas (MAs), which can be swiftly repositioned within a defined region, offer a promising solution to the limitations of fixed-position antennas (FPAs) in adapting to spatial variations in wireless channels, thereby improving channel conditions and communication between transceivers. However, frequent MA position adjustments based on instantaneous channel state information (CSI) incur h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05912v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05912v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05912v1-abstract-full" style="display: none;"> Movable antennas (MAs), which can be swiftly repositioned within a defined region, offer a promising solution to the limitations of fixed-position antennas (FPAs) in adapting to spatial variations in wireless channels, thereby improving channel conditions and communication between transceivers. However, frequent MA position adjustments based on instantaneous channel state information (CSI) incur high operational complexity, making real-time CSI acquisition impractical, especially in fast-fading channels. To address these challenges, we propose a two-timescale transmission framework for MA-enabled multiuser multiple-input-multiple-output (MU-MIMO) systems. In the large timescale, statistical CSI is exploited to optimize MA positions for long-term ergodic performance, whereas, in the small timescale, beamforming vectors are designed using instantaneous CSI to handle short-term channel fluctuations. Within this new framework, we analyze the ergodic sum rate and develop efficient MA position optimization algorithms for both maximum-ratio-transmission (MRT) and zero-forcing (ZF) beamforming schemes. These algorithms employ alternating optimization (AO), successive convex approximation (SCA), and majorization-minimization (MM) techniques, iteratively optimizing antenna positions and refining surrogate functions that approximate the ergodic sum rate. Numerical results show significant ergodic sum rate gains with the proposed two-timescale MA design over conventional FPA systems, particularly under moderate to strong line-of-sight (LoS) conditions. Notably, MA with ZF beamforming consistently outperforms MA with MRT, highlighting the synergy between beamforming and MAs for superior interference management in environments with moderate Rician factors and high user density, while MA with MRT can offer a simplified alternative to complex beamforming designs in strong LoS conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05912v1-abstract-full').style.display = 'none'; document.getElementById('2410.05912v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 12 figures, submitted to an IEEE journal for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04414">arXiv:2410.04414</a> <span> [<a href="https://arxiv.org/pdf/2410.04414">pdf</a>, <a href="https://arxiv.org/format/2410.04414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Spatial Multiplexing Oriented Channel Reconfiguration in Multi-IRS Aided MIMO Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yuxuan Chen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+G">Guangji Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04414v1-abstract-short" style="display: inline;"> Spatial multiplexing plays a significant role in improving the capacity of multiple-input multiple-output (MIMO) communication systems. To improve the spectral efficiency (SE) of a point-to-point MIMO system, we exploit the channel reconfiguration capabilities provided by multiple intelligent reflecting surfaces (IRSs) to enhance the spatial multiplexing. Unlike most existing works, we address bot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04414v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04414v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04414v1-abstract-full" style="display: none;"> Spatial multiplexing plays a significant role in improving the capacity of multiple-input multiple-output (MIMO) communication systems. To improve the spectral efficiency (SE) of a point-to-point MIMO system, we exploit the channel reconfiguration capabilities provided by multiple intelligent reflecting surfaces (IRSs) to enhance the spatial multiplexing. Unlike most existing works, we address both the issues of the IRSs placement and elements allocation. To this end, we first introduce an orthogonal placement strategy to mitigate channel correlation, thereby enabling interference-free multi-stream transmission. Subsequently, we propose a successive convex approximation (SCA)-based approach to jointly optimize the IRS elements and power allocation. Our theoretical analysis unveils that equal IRS elements/power allocation scheme becomes asymptotically optimal as the number of IRS elements and transmit power tend to be infinite. Numerical results demonstrate that when the total number of IRS elements or the power exceeds a certain threshold, a multi-IRS assisted system outperforms a single IRS configuration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04414v1-abstract-full').style.display = 'none'; document.getElementById('2410.04414v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03680">arXiv:2410.03680</a> <span> [<a href="https://arxiv.org/pdf/2410.03680">pdf</a>, <a href="https://arxiv.org/format/2410.03680">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Leafeon: Towards Accurate, Robust and Low-cost Leaf Water Content Sensing Using mmWave Radar </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cardamis%2C+M">Mark Cardamis</a>, <a href="/search/eess?searchtype=author&query=Jia%2C+H">Hong Jia</a>, <a href="/search/eess?searchtype=author&query=Qian%2C+H">Hao Qian</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wenyao Chen</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Y">Yihe Yan</a>, <a href="/search/eess?searchtype=author&query=Ghannoum%2C+O">Oula Ghannoum</a>, <a href="/search/eess?searchtype=author&query=Quigley%2C+A">Aaron Quigley</a>, <a href="/search/eess?searchtype=author&query=Chou%2C+C+T">Chung Tung Chou</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+W">Wen Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03680v1-abstract-short" style="display: inline;"> Plant sensing plays an important role in modern smart agriculture and the farming industry. Remote radio sensing allows for monitoring essential indicators of plant health, such as leaf water content. While recent studies have shown the potential of using millimeter-wave (mmWave) radar for plant sensing, many overlook crucial factors such as leaf structure and surface roughness, which can impact t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03680v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03680v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03680v1-abstract-full" style="display: none;"> Plant sensing plays an important role in modern smart agriculture and the farming industry. Remote radio sensing allows for monitoring essential indicators of plant health, such as leaf water content. While recent studies have shown the potential of using millimeter-wave (mmWave) radar for plant sensing, many overlook crucial factors such as leaf structure and surface roughness, which can impact the accuracy of the measurements. In this paper, we introduce Leafeon, which leverages mmWave radar to measure leaf water content non-invasively. Utilizing electronic beam steering, multiple leaf perspectives are sent to a custom deep neural network, which discerns unique reflection patterns from subtle antenna variations, ensuring accurate and robust leaf water content estimations. We implement a prototype of Leafeon using a Commercial Off-The-Shelf mmWave radar and evaluate its performance with a variety of different leaf types. Leafeon was trained in-lab using high-resolution destructive leaf measurements, achieving a Mean Absolute Error (MAE) of leaf water content as low as 3.17% for the Avocado leaf, significantly outperforming the state-of-the-art approaches with an MAE reduction of up to 55.7%. Furthermore, we conducted experiments on live plants in both indoor and glasshouse experimental farm environments (see Fig. 1). Our results showed a strong correlation between predicted leaf water content levels and drought events. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03680v1-abstract-full').style.display = 'none'; document.getElementById('2410.03680v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03243">arXiv:2410.03243</a> <span> [<a href="https://arxiv.org/pdf/2410.03243">pdf</a>, <a href="https://arxiv.org/format/2410.03243">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Towards TMA-Based Transmissive RIS Transceiver Enabled Downlink Communication Networks: A Consensus-ADMM Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhendong Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+H">Haoran Qin</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+X">Xusheng Zhu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Ziheng Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03243v1-abstract-short" style="display: inline;"> This paper presents a novel multi-stream downlink communication system that utilizes a transmissive reconfigurable intelligent surface (RIS) transceiver. Specifically, we elaborate the downlink communication scheme using time-modulated array (TMA) technology, which enables high order modulation and multi-stream beamforming. Then, an optimization problem is formulated to maximize the minimum signal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03243v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03243v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03243v1-abstract-full" style="display: none;"> This paper presents a novel multi-stream downlink communication system that utilizes a transmissive reconfigurable intelligent surface (RIS) transceiver. Specifically, we elaborate the downlink communication scheme using time-modulated array (TMA) technology, which enables high order modulation and multi-stream beamforming. Then, an optimization problem is formulated to maximize the minimum signal-to-interference-plusnoise ratio (SINR) with user fairness, which takes into account the constraint of the maximum available power for each transmissive element. Due to the non-convex nature of the formulated problem,finding optimal solution is challenging. To mitigate the complexity,we propose a linear-complexity beamforming algorithm based on consensus alternating direction method of multipliers (ADMM).Specifically, by introducing a set of auxiliary variables, the problem can be decomposed into multiple sub-problems that are amenable to parallel computation, where the each sub-problem can yield closed-form expressions, bringing a significant reduction in the computational complexity. The overall problem achieves convergence by iteratively addressing these sub-problems in an alternating manner. Finally, the convergence of the proposed algorithm and the impact of various parameter configurations on the system performance are validated through numerical simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03243v1-abstract-full').style.display = 'none'; document.getElementById('2410.03243v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE TCOM 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02289">arXiv:2410.02289</a> <span> [<a href="https://arxiv.org/pdf/2410.02289">pdf</a>, <a href="https://arxiv.org/format/2410.02289">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Model-Based GNN Enabled Energy-Efficient Beamforming for Ultra-Dense Wireless Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rongsheng Zhang</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Y">Yang Lu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/eess?searchtype=author&query=Ai%2C+B">Bo Ai</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Z">Zhiguo Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02289v1-abstract-short" style="display: inline;"> This paper investigates deep learning enabled beamforming design for ultra-dense wireless networks by integrating prior knowledge and graph neural network (GNN), named model-based GNN. A energy efficiency (EE) maximization problem is formulated subject to power budget and quality of service (QoS) requirements, which is reformulated based on the minimum mean square error scheme and the hybrid zero-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02289v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02289v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02289v1-abstract-full" style="display: none;"> This paper investigates deep learning enabled beamforming design for ultra-dense wireless networks by integrating prior knowledge and graph neural network (GNN), named model-based GNN. A energy efficiency (EE) maximization problem is formulated subject to power budget and quality of service (QoS) requirements, which is reformulated based on the minimum mean square error scheme and the hybrid zero-forcing and maximum ratio transmission schemes. Based on the reformulated problem, the model-based GNN to realize the mapping from channel state information to beamforming vectors. Particular, the multi-head attention mechanism and residual connection are adopted to enhance the feature extracting, and a scheme selection module is designed to improve the adaptability of GNN. The unsupervised learning is adopted, and a various-input training strategy is proposed to enhance the stability of GNN. Numerical results demonstrate the millisecond-level response with limited performance loss, the scalability to different users and the adaptability to various channel conditions and QoS requirements of the model-based GNN in ultra-dense wireless networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02289v1-abstract-full').style.display = 'none'; document.getElementById('2410.02289v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02277">arXiv:2410.02277</a> <span> [<a href="https://arxiv.org/pdf/2410.02277">pdf</a>, <a href="https://arxiv.org/format/2410.02277">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> GNN-Enabled Optimization of Placement and Transmission Design for UAV Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qinyu Wang</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Y">Yang Lu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/eess?searchtype=author&query=Ai%2C+B">Bo Ai</a>, <a href="/search/eess?searchtype=author&query=Zhong%2C+Z">Zhangdui Zhong</a>, <a href="/search/eess?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02277v1-abstract-short" style="display: inline;"> This paper applies graph neural networks (GNN) in UAV communications to optimize the placement and transmission design. We consider a multiple-user multiple-input-single-output UAV communication system where a UAV intends to find a placement to hover and serve users with maximum energy efficiency (EE). To facilitate the GNN-based learning, we adopt the hybrid maximum ratio transmission and zero fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02277v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02277v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02277v1-abstract-full" style="display: none;"> This paper applies graph neural networks (GNN) in UAV communications to optimize the placement and transmission design. We consider a multiple-user multiple-input-single-output UAV communication system where a UAV intends to find a placement to hover and serve users with maximum energy efficiency (EE). To facilitate the GNN-based learning, we adopt the hybrid maximum ratio transmission and zero forcing scheme to design the beamforming vectors and a feature augment is implemented by manually setting edge features. Furthermore, we propose a two-stage GNN-based model where the first stage and the second stage yield the placement and the transmission design, respectively. The two stages are connected via a residual and their learnable weights are jointly optimized by via unsupervised learning. Numerical results illustrate the effectiveness and validate the scalability to both UAV antennas and users of the proposed model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02277v1-abstract-full').style.display = 'none'; document.getElementById('2410.02277v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.20485">arXiv:2409.20485</a> <span> [<a href="https://arxiv.org/pdf/2409.20485">pdf</a>, <a href="https://arxiv.org/format/2409.20485">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Movable Antennas Enabled Wireless-Powered NOMA: Continuous and Discrete Positioning Designs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gao%2C+Y">Ying Gao</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.20485v1-abstract-short" style="display: inline;"> This paper investigates a movable antenna (MA)-enabled wireless-powered communication network (WPCN), where multiple wireless devices (WDs) first harvest energy from the downlink (DL) signal broadcast by a hybrid access point (HAP) and then transmit information in the uplink (UL) using non-orthogonal multiple access. Unlike conventional WPCNs with fixed-position antennas (FPAs), this MA-enabled WP… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20485v1-abstract-full').style.display = 'inline'; document.getElementById('2409.20485v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.20485v1-abstract-full" style="display: none;"> This paper investigates a movable antenna (MA)-enabled wireless-powered communication network (WPCN), where multiple wireless devices (WDs) first harvest energy from the downlink (DL) signal broadcast by a hybrid access point (HAP) and then transmit information in the uplink (UL) using non-orthogonal multiple access. Unlike conventional WPCNs with fixed-position antennas (FPAs), this MA-enabled WPCN allows the MAs at the HAP and the WDs to adjust their positions twice: once before DL wireless power transfer and once before DL wireless information transmission. Our goal is to maximize the system sum throughput by jointly optimizing the MA positions, the time allocation, and the UL power allocation. Considering the characteristics of antenna movement, we explore both continuous and discrete positioning designs, which, after formulation, are found to be non-convex optimization problems. Before tackling these problems, we rigorously prove that using identical MA positions for both DL and UL is the optimal strategy in both scenarios, thereby greatly simplifying the problems and enabling easier practical implementation of the system. We then propose alternating optimization-based algorithms for the resulting simplified problems. Simulation results show that: 1) the proposed continuous MA scheme can enhance the sum throughput by up to 395.71% compared to the benchmark with FPAs, even when additional compensation transmission time is provided to the latter; 2) a step size of one-quarter wavelength for the MA motion driver is generally sufficient for the proposed discrete MA scheme to achieve over 80% of the sum throughput performance of the continuous MA scheme; 3) when each moving region is large enough to include multiple optimal positions for the continuous MA scheme, the discrete MA scheme can achieve comparable sum throughput without requiring an excessively small step size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20485v1-abstract-full').style.display = 'none'; document.getElementById('2409.20485v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 10 figures (subfigures included), submitted to an IEEE journal for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18828">arXiv:2409.18828</a> <span> [<a href="https://arxiv.org/pdf/2409.18828">pdf</a>, <a href="https://arxiv.org/format/2409.18828">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MECG-E: Mamba-based ECG Enhancer for Baseline Wander Removal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hung%2C+K">Kuo-Hsuan Hung</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kuan-Chen Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+K">Kai-Chun Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei-Lun Chen</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+X">Xugang Lu</a>, <a href="/search/eess?searchtype=author&query=Tsao%2C+Y">Yu Tsao</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+C">Chii-Wann Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18828v2-abstract-short" style="display: inline;"> Electrocardiogram (ECG) is an important non-invasive method for diagnosing cardiovascular disease. However, ECG signals are susceptible to noise contamination, such as electrical interference or signal wandering, which reduces diagnostic accuracy. Various ECG denoising methods have been proposed, but most existing methods yield suboptimal performance under very noisy conditions or require several… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18828v2-abstract-full').style.display = 'inline'; document.getElementById('2409.18828v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18828v2-abstract-full" style="display: none;"> Electrocardiogram (ECG) is an important non-invasive method for diagnosing cardiovascular disease. However, ECG signals are susceptible to noise contamination, such as electrical interference or signal wandering, which reduces diagnostic accuracy. Various ECG denoising methods have been proposed, but most existing methods yield suboptimal performance under very noisy conditions or require several steps during inference, leading to latency during online processing. In this paper, we propose a novel ECG denoising model, namely Mamba-based ECG Enhancer (MECG-E), which leverages the Mamba architecture known for its fast inference and outstanding nonlinear mapping capabilities. Experimental results indicate that MECG-E surpasses several well-known existing models across multiple metrics under different noise conditions. Additionally, MECG-E requires less inference time than state-of-the-art diffusion-based ECG denoisers, demonstrating the model's functionality and efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18828v2-abstract-full').style.display = 'none'; document.getElementById('2409.18828v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IEEE BigData 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15897">arXiv:2409.15897</a> <span> [<a href="https://arxiv.org/pdf/2409.15897">pdf</a>, <a href="https://arxiv.org/ps/2409.15897">ps</a>, <a href="https://arxiv.org/format/2409.15897">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> ESPnet-Codec: Comprehensive Training and Evaluation of Neural Codecs for Audio, Music, and Speech </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shi%2C+J">Jiatong Shi</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+J">Jinchuan Tian</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yihan Wu</a>, <a href="/search/eess?searchtype=author&query=Jung%2C+J">Jee-weon Jung</a>, <a href="/search/eess?searchtype=author&query=Yip%2C+J+Q">Jia Qi Yip</a>, <a href="/search/eess?searchtype=author&query=Masuyama%2C+Y">Yoshiki Masuyama</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">William Chen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yuning Wu</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+Y">Yuxun Tang</a>, <a href="/search/eess?searchtype=author&query=Baali%2C+M">Massa Baali</a>, <a href="/search/eess?searchtype=author&query=Alharhi%2C+D">Dareen Alharhi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+D">Dong Zhang</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+R">Ruifan Deng</a>, <a href="/search/eess?searchtype=author&query=Srivastava%2C+T">Tejes Srivastava</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+H">Haibin Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+A+H">Alexander H. Liu</a>, <a href="/search/eess?searchtype=author&query=Raj%2C+B">Bhiksha Raj</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+Q">Qin Jin</a>, <a href="/search/eess?searchtype=author&query=Song%2C+R">Ruihua Song</a>, <a href="/search/eess?searchtype=author&query=Watanabe%2C+S">Shinji Watanabe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15897v1-abstract-short" style="display: inline;"> Neural codecs have become crucial to recent speech and audio generation research. In addition to signal compression capabilities, discrete codecs have also been found to enhance downstream training efficiency and compatibility with autoregressive language models. However, as extensive downstream applications are investigated, challenges have arisen in ensuring fair comparisons across diverse appli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15897v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15897v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15897v1-abstract-full" style="display: none;"> Neural codecs have become crucial to recent speech and audio generation research. In addition to signal compression capabilities, discrete codecs have also been found to enhance downstream training efficiency and compatibility with autoregressive language models. However, as extensive downstream applications are investigated, challenges have arisen in ensuring fair comparisons across diverse applications. To address these issues, we present a new open-source platform ESPnet-Codec, which is built on ESPnet and focuses on neural codec training and evaluation. ESPnet-Codec offers various recipes in audio, music, and speech for training and evaluation using several widely adopted codec models. Together with ESPnet-Codec, we present VERSA, a standalone evaluation toolkit, which provides a comprehensive evaluation of codec performance over 20 audio evaluation metrics. Notably, we demonstrate that ESPnet-Codec can be integrated into six ESPnet tasks, supporting diverse applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15897v1-abstract-full').style.display = 'none'; document.getElementById('2409.15897v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by SLT</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15711">arXiv:2409.15711</a> <span> [<a href="https://arxiv.org/pdf/2409.15711">pdf</a>, <a href="https://arxiv.org/format/2409.15711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Federated Consensus Learning for Surface Defect Classification Under Data Heterogeneity in IIoT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cui%2C+J">Jixuan Cui</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jun Li</a>, <a href="/search/eess?searchtype=author&query=Mei%2C+Z">Zhen Mei</a>, <a href="/search/eess?searchtype=author&query=Ni%2C+Y">Yiyang Ni</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zengxiang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15711v2-abstract-short" style="display: inline;"> The challenge of data scarcity hinders the application of deep learning in industrial surface defect classification (SDC), as it's difficult to collect and centralize sufficient training data from various entities in Industrial Internet of Things (IIoT) due to privacy concerns. Federated learning (FL) provides a solution by enabling collaborative global model training across clients while maintain… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15711v2-abstract-full').style.display = 'inline'; document.getElementById('2409.15711v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15711v2-abstract-full" style="display: none;"> The challenge of data scarcity hinders the application of deep learning in industrial surface defect classification (SDC), as it's difficult to collect and centralize sufficient training data from various entities in Industrial Internet of Things (IIoT) due to privacy concerns. Federated learning (FL) provides a solution by enabling collaborative global model training across clients while maintaining privacy. However, performance may suffer due to data heterogeneity-discrepancies in data distributions among clients. In this paper, we propose a novel personalized FL (PFL) approach, named Adversarial Federated Consensus Learning (AFedCL), for the challenge of data heterogeneity across different clients in SDC. First, we develop a dynamic consensus construction strategy to mitigate the performance degradation caused by data heterogeneity. Through adversarial training, local models from different clients utilize the global model as a bridge to achieve distribution alignment, alleviating the problem of global knowledge forgetting. Complementing this strategy, we propose a consensus-aware aggregation mechanism. It assigns aggregation weights to different clients based on their efficacy in global knowledge learning, thereby enhancing the global model's generalization capabilities. Finally, we design an adaptive feature fusion module to further enhance global knowledge utilization efficiency. Personalized fusion weights are gradually adjusted for each client to optimally balance global and local features. Compared with state-of-the-art FL methods like FedALA, the proposed AFedCL method achieves an accuracy increase of up to 5.67% on three SDC datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15711v2-abstract-full').style.display = 'none'; document.getElementById('2409.15711v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14829">arXiv:2409.14829</a> <span> [<a href="https://arxiv.org/pdf/2409.14829">pdf</a>, <a href="https://arxiv.org/format/2409.14829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> RoWSFormer: A Robust Watermarking Framework with Swin Transformer for Enhanced Geometric Attack Resilience </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weitong Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14829v1-abstract-short" style="display: inline;"> In recent years, digital watermarking techniques based on deep learning have been widely studied. To achieve both imperceptibility and robustness of image watermarks, most current methods employ convolutional neural networks to build robust watermarking frameworks. However, despite the success of CNN-based watermarking models, they struggle to achieve robustness against geometric attacks due to th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14829v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14829v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14829v1-abstract-full" style="display: none;"> In recent years, digital watermarking techniques based on deep learning have been widely studied. To achieve both imperceptibility and robustness of image watermarks, most current methods employ convolutional neural networks to build robust watermarking frameworks. However, despite the success of CNN-based watermarking models, they struggle to achieve robustness against geometric attacks due to the limitations of convolutional neural networks in capturing global and long-range relationships. To address this limitation, we propose a robust watermarking framework based on the Swin Transformer, named RoWSFormer. Specifically, we design the Locally-Channel Enhanced Swin Transformer Block as the core of both the encoder and decoder. This block utilizes the self-attention mechanism to capture global and long-range information, thereby significantly improving adaptation to geometric distortions. Additionally, we construct the Frequency-Enhanced Transformer Block to extract frequency domain information, which further strengthens the robustness of the watermarking framework. Experimental results demonstrate that our RoWSFormer surpasses existing state-of-the-art watermarking methods. For most non-geometric attacks, RoWSFormer improves the PSNR by 3 dB while maintaining the same extraction accuracy. In the case of geometric attacks (such as rotation, scaling, and affine transformations), RoWSFormer achieves over a 6 dB improvement in PSNR, with extraction accuracy exceeding 97\%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14829v1-abstract-full').style.display = 'none'; document.getElementById('2409.14829v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14739">arXiv:2409.14739</a> <span> [<a href="https://arxiv.org/pdf/2409.14739">pdf</a>, <a href="https://arxiv.org/format/2409.14739">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> AmpAgent: An LLM-based Multi-Agent System for Multi-stage Amplifier Schematic Design from Literature for Process and Performance Porting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+C">Chengjie Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weiyu Chen</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+A">Anlan Peng</a>, <a href="/search/eess?searchtype=author&query=Du%2C+Y">Yuan Du</a>, <a href="/search/eess?searchtype=author&query=Du%2C+L">Li Du</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jun Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14739v1-abstract-short" style="display: inline;"> Multi-stage amplifiers are widely applied in analog circuits. However, their large number of components, complex transfer functions, and intricate pole-zero distributions necessitate extensive manpower for derivation and param sizing to ensure their stability. In order to achieve efficient derivation of the transfer function and simplify the difficulty of circuit design, we propose AmpAgent: a mul… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14739v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14739v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14739v1-abstract-full" style="display: none;"> Multi-stage amplifiers are widely applied in analog circuits. However, their large number of components, complex transfer functions, and intricate pole-zero distributions necessitate extensive manpower for derivation and param sizing to ensure their stability. In order to achieve efficient derivation of the transfer function and simplify the difficulty of circuit design, we propose AmpAgent: a multi-agent system based on large language models (LLMs) for efficiently designing such complex amplifiers from literature with process and performance porting. AmpAgent is composed of three agents: Literature Analysis Agent, Mathematics Reasoning Agent and Device Sizing Agent. They are separately responsible for retrieving key information (e.g. formulas and transfer functions) from the literature, decompose the whole circuit's design problem by deriving the key formulas, and address the decomposed problem iteratively. AmpAgent was employed in the schematic design of seven types of multi-stage amplifiers with different compensation techniques. In terms of design efficiency, AmpAgent has reduced the number of iterations by 1.32$ \sim $4${\times}$ and execution time by 1.19$ \sim $2.99${\times}$ compared to conventional optimization algorithms, with a success rate increased by 1.03$ \sim $6.79${\times}$. In terms of circuit performance, it has improved by 1.63$ \sim $27.25${\times}$ compared to the original literature. The findings suggest that LLMs could play a crucial role in the field of complex analog circuit schematic design, as well as process and performance porting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14739v1-abstract-full').style.display = 'none'; document.getElementById('2409.14739v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14441">arXiv:2409.14441</a> <span> [<a href="https://arxiv.org/pdf/2409.14441">pdf</a>, <a href="https://arxiv.org/format/2409.14441">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> BUPTCMCC-6G-CMG+: A GBSM-Based ISAC Standard Channel Model Generator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhao%2C+C">Changsheng Zhao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+L">Lei Tian</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Heng Wang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+H">Hanyuan Jiang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yameng Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wenjun Chen</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+G">Guangyi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14441v3-abstract-short" style="display: inline;"> Integrated sensing and communication (ISAC) has been recognized as the key technology in the vision of the sixth generation (6G) era. With the emergence of new concepts in mobile communications, the channel model is the prerequisite for system design and performance evaluation. Currently, 3GPP Release 19 is advancing the standardization of ISAC channel models. Nevertheless, a unified modeling fram… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14441v3-abstract-full').style.display = 'inline'; document.getElementById('2409.14441v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14441v3-abstract-full" style="display: none;"> Integrated sensing and communication (ISAC) has been recognized as the key technology in the vision of the sixth generation (6G) era. With the emergence of new concepts in mobile communications, the channel model is the prerequisite for system design and performance evaluation. Currently, 3GPP Release 19 is advancing the standardization of ISAC channel models. Nevertheless, a unified modeling framework has yet to be established. This paper provides a simulation diagram of ISAC channel modeling extended based on the Geometry-Based Stochastic Model (GBSM), which is compatible with existing 5G channel models and the latest progress in 3GPP standardization. We first introduce the progress of the ISAC channel model standard in general. Then a concatenation channel modeling approach considering team standardization proposals is presented which is implemented on the BUPTCMCC-6G-CMG+ channel model generator. We validated the model in cumulative probability density function (CDF) in statistical extension of angle and delay, and radar cross section (RCS). Simulation results show that the proposed model can realistically characterize the feature of concatenation and RCS within the ISAC channel. At the same time, the proposed method maintains an efficient simulation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14441v3-abstract-full').style.display = 'none'; document.getElementById('2409.14441v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages,4 fiures,4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13943">arXiv:2409.13943</a> <span> [<a href="https://arxiv.org/pdf/2409.13943">pdf</a>, <a href="https://arxiv.org/ps/2409.13943">ps</a>, <a href="https://arxiv.org/format/2409.13943">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> QoS-Aware and Routing-Flexible Network Slicing for Service-Oriented Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei-Kun Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Ya-Feng Liu</a>, <a href="/search/eess?searchtype=author&query=Dai%2C+Y">Yu-Hong Dai</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Z">Zhi-Quan Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13943v1-abstract-short" style="display: inline;"> In this paper, we consider the network slicing (NS) problem which attempts to map multiple customized virtual network requests (also called services) to a common shared network infrastructure and manage network resources to meet diverse quality of service (QoS) requirements. We propose a mixed-integer nonlinear programming (MINLP) formulation for the considered NS problem that can flexibly route t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13943v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13943v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13943v1-abstract-full" style="display: none;"> In this paper, we consider the network slicing (NS) problem which attempts to map multiple customized virtual network requests (also called services) to a common shared network infrastructure and manage network resources to meet diverse quality of service (QoS) requirements. We propose a mixed-integer nonlinear programming (MINLP) formulation for the considered NS problem that can flexibly route the traffic flow of the services on multiple paths and provide end-to-end delay and reliability guarantees for all services. To overcome the computational difficulty due to the intrinsic nonlinearity in the MINLP formulation, we transform the MINLP formulation into an equivalent mixed-integer linear programming (MILP) formulation and further show that their continuous relaxations are equivalent. In sharp contrast to the continuous relaxation of the MINLP formulation which is a nonconvex nonlinear programming problem, the continuous relaxation of the MILP formulation is a polynomial-time solvable linear programming problem, which significantly facilitates the algorithmic design. Based on the newly proposed MILP formulation, we develop a customized column generation (cCG) algorithm for solving the NS problem. The proposed cCG algorithm is a decomposition-based algorithm and is particularly suitable for solving large-scale NS problems. Numerical results demonstrate the efficacy of the proposed formulations and the proposed cCG algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13943v1-abstract-full').style.display = 'none'; document.getElementById('2409.13943v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 10 figs, submitted for possible publication. arXiv admin note: text overlap with arXiv:2110.03915</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12600">arXiv:2409.12600</a> <span> [<a href="https://arxiv.org/pdf/2409.12600">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A Systematic Post-Processing Approach for Quantitative $T_{1蟻}$ Imaging of Knee Articular Cartilage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhong%2C+J">Junru Zhong</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+Y">Yongcheng Yao</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+F">Fan Xiao</a>, <a href="/search/eess?searchtype=author&query=Ong%2C+T+M">Tim-Yun Michael Ong</a>, <a href="/search/eess?searchtype=author&query=Ho%2C+K+K">Ki-Wai Kevin Ho</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S">Siyue Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chaoxing Huang</a>, <a href="/search/eess?searchtype=author&query=Chan%2C+Q">Queenie Chan</a>, <a href="/search/eess?searchtype=author&query=Griffith%2C+J+F">James F. Griffith</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weitian Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12600v1-abstract-short" style="display: inline;"> Objective: To establish an automated pipeline for post-processing of quantitative spin-lattice relaxation time constant in the rotating frame ($T_{1蟻}$) imaging of knee articular cartilage. Design: The proposed post-processing pipeline commences with an image standardisation procedure, followed by deep learning-based segmentation to generate cartilage masks. The articular cartilage is then automat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12600v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12600v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12600v1-abstract-full" style="display: none;"> Objective: To establish an automated pipeline for post-processing of quantitative spin-lattice relaxation time constant in the rotating frame ($T_{1蟻}$) imaging of knee articular cartilage. Design: The proposed post-processing pipeline commences with an image standardisation procedure, followed by deep learning-based segmentation to generate cartilage masks. The articular cartilage is then automatically parcellated into 20 subregions, where $T_{1蟻}$ quantification is performed. The proposed pipeline was retrospectively validated on a dataset comprising knee $T_{1蟻}$ images of 10 healthy volunteers and 30 patients with knee osteoarthritis. Three experiments were conducted, namely an assessment of segmentation model performance (using Dice similarity coefficients, DSCs); an evaluation of the impact of standardisation; and a test of $T_{1蟻}$ quantification accuracy (using paired t-tests; root-mean-square deviations, RMSDs; and coefficients of variance of RMSDs, $CV_{RMSD}$). Statistical significance was set as p<0.05. Results: There was a substantial agreement between the subregional $T_{1蟻}$ quantification from the model-predicted masks and those from the manual segmentation labels. In patients, 17 of 20 subregions, and in healthy volunteers, 18 out of 20 subregions, demonstrated no significant difference between predicted and reference $T_{1蟻}$ quantifications. Average RMSDs were 0.79 ms for patients and 0.56 ms for healthy volunteers, while average $CV_{RMSD}$ were 1.97% and 1.38% for patients and healthy volunteers. Bland-Altman plots showed negligible bias across all subregions for patients and healthy volunteers. Conclusion: The proposed pipeline can perform automatic and reliable post-processing of quantitative $T_{1蟻}$ images of knee articular cartilage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12600v1-abstract-full').style.display = 'none'; document.getElementById('2409.12600v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Junru Zhong and Yongcheng Yao share the same contribution. Work was partially done when Yongcheng Yao and Siyue Li was with CUHK</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09506">arXiv:2409.09506</a> <span> [<a href="https://arxiv.org/pdf/2409.09506">pdf</a>, <a href="https://arxiv.org/format/2409.09506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> ESPnet-EZ: Python-only ESPnet for Easy Fine-tuning and Integration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Someki%2C+M">Masao Someki</a>, <a href="/search/eess?searchtype=author&query=Choi%2C+K">Kwanghee Choi</a>, <a href="/search/eess?searchtype=author&query=Arora%2C+S">Siddhant Arora</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">William Chen</a>, <a href="/search/eess?searchtype=author&query=Cornell%2C+S">Samuele Cornell</a>, <a href="/search/eess?searchtype=author&query=Han%2C+J">Jionghao Han</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+Y">Yifan Peng</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+J">Jiatong Shi</a>, <a href="/search/eess?searchtype=author&query=Srivastav%2C+V">Vaibhav Srivastav</a>, <a href="/search/eess?searchtype=author&query=Watanabe%2C+S">Shinji Watanabe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09506v1-abstract-short" style="display: inline;"> We introduce ESPnet-EZ, an extension of the open-source speech processing toolkit ESPnet, aimed at quick and easy development of speech models. ESPnet-EZ focuses on two major aspects: (i) easy fine-tuning and inference of existing ESPnet models on various tasks and (ii) easy integration with popular deep neural network frameworks such as PyTorch-Lightning, Hugging Face transformers and datasets, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09506v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09506v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09506v1-abstract-full" style="display: none;"> We introduce ESPnet-EZ, an extension of the open-source speech processing toolkit ESPnet, aimed at quick and easy development of speech models. ESPnet-EZ focuses on two major aspects: (i) easy fine-tuning and inference of existing ESPnet models on various tasks and (ii) easy integration with popular deep neural network frameworks such as PyTorch-Lightning, Hugging Face transformers and datasets, and Lhotse. By replacing ESPnet design choices inherited from Kaldi with a Python-only, Bash-free interface, we dramatically reduce the effort required to build, debug, and use a new model. For example, to fine-tune a speech foundation model, ESPnet-EZ, compared to ESPnet, reduces the number of newly written code by 2.7x and the amount of dependent code by 6.7x while dramatically reducing the Bash script dependencies. The codebase of ESPnet-EZ is publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09506v1-abstract-full').style.display = 'none'; document.getElementById('2409.09506v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to SLT 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07361">arXiv:2409.07361</a> <span> [<a href="https://arxiv.org/pdf/2409.07361">pdf</a>, <a href="https://arxiv.org/format/2409.07361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Quantifying Knee Cartilage Shape and Lesion: From Image to Metrics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yao%2C+Y">Yongcheng Yao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weitian Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07361v1-abstract-short" style="display: inline;"> Imaging features of knee articular cartilage have been shown to be potential imaging biomarkers for knee osteoarthritis. Despite recent methodological advancements in image analysis techniques like image segmentation, registration, and domain-specific image computing algorithms, only a few works focus on building fully automated pipelines for imaging feature extraction. In this study, we developed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07361v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07361v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07361v1-abstract-full" style="display: none;"> Imaging features of knee articular cartilage have been shown to be potential imaging biomarkers for knee osteoarthritis. Despite recent methodological advancements in image analysis techniques like image segmentation, registration, and domain-specific image computing algorithms, only a few works focus on building fully automated pipelines for imaging feature extraction. In this study, we developed a deep-learning-based medical image analysis application for knee cartilage morphometrics, CartiMorph Toolbox (CMT). We proposed a 2-stage joint template learning and registration network, CMT-reg. We trained the model using the OAI-ZIB dataset and assessed its performance in template-to-image registration. The CMT-reg demonstrated competitive results compared to other state-of-the-art models. We integrated the proposed model into an automated pipeline for the quantification of cartilage shape and lesion (full-thickness cartilage loss, specifically). The toolbox provides a comprehensive, user-friendly solution for medical image analysis and data visualization. The software and models are available at https://github.com/YongchengYAO/CMT-AMAI24paper . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07361v1-abstract-full').style.display = 'none'; document.getElementById('2409.07361v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper will be in the conference proceedings of AMAI 2024. See the conference website: https://sites.google.com/view/amai2024/home</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06237">arXiv:2409.06237</a> <span> [<a href="https://arxiv.org/pdf/2409.06237">pdf</a>, <a href="https://arxiv.org/format/2409.06237">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> RobustSVC: HuBERT-based Melody Extractor and Adversarial Learning for Robust Singing Voice Conversion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+X">Xintao Zhao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jun Chen</a>, <a href="/search/eess?searchtype=author&query=Sha%2C+B">Binzhu Sha</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Z">Zhiwei Lin</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Z">Zhiyong Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06237v1-abstract-short" style="display: inline;"> Singing voice conversion (SVC) is hindered by noise sensitivity due to the use of non-robust methods for extracting pitch and energy during the inference. As clean signals are key for the source audio in SVC, music source separation preprocessing offers a viable solution for handling noisy audio, like singing with background music (BGM). However, current separating methods struggle to fully remove… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06237v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06237v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06237v1-abstract-full" style="display: none;"> Singing voice conversion (SVC) is hindered by noise sensitivity due to the use of non-robust methods for extracting pitch and energy during the inference. As clean signals are key for the source audio in SVC, music source separation preprocessing offers a viable solution for handling noisy audio, like singing with background music (BGM). However, current separating methods struggle to fully remove noise or excessively suppress signal components, affecting the naturalness and similarity of the processed audio. To tackle this, our study introduces RobustSVC, a novel any-to-one SVC framework that converts noisy vocals into clean vocals sung by the target singer. We replace the non-robust feature with a HuBERT-based melody extractor and use adversarial training mechanisms with three discriminators to reduce information leakage in self-supervised representations. Experimental results show that RobustSVC is noise-robust and achieves higher similarity and naturalness than baseline methods in both noisy and clean vocal conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06237v1-abstract-full').style.display = 'none'; document.getElementById('2409.06237v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ISCSLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03597">arXiv:2409.03597</a> <span> [<a href="https://arxiv.org/pdf/2409.03597">pdf</a>, <a href="https://arxiv.org/format/2409.03597">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Laryngoscopic Video Analysis for Assisted Diagnosis of Vocal Fold Paralysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yucong Zhang</a>, <a href="/search/eess?searchtype=author&query=Zou%2C+X">Xin Zou</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jinshan Yang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wenjun Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Juan Liu</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+F">Faya Liang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+M">Ming Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03597v2-abstract-short" style="display: inline;"> This paper presents the Multimodal Laryngoscopic Video Analyzing System (MLVAS), a novel system that leverages both audio and video data to automatically extract key segments and metrics from raw laryngeal videostroboscopic videos for assisted clinical assessment. The system integrates video-based glottis detection with an audio keyword spotting method to analyze both video and audio data, identif… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03597v2-abstract-full').style.display = 'inline'; document.getElementById('2409.03597v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03597v2-abstract-full" style="display: none;"> This paper presents the Multimodal Laryngoscopic Video Analyzing System (MLVAS), a novel system that leverages both audio and video data to automatically extract key segments and metrics from raw laryngeal videostroboscopic videos for assisted clinical assessment. The system integrates video-based glottis detection with an audio keyword spotting method to analyze both video and audio data, identifying patient vocalizations and refining video highlights to ensure optimal inspection of vocal fold movements. Additionally, MLVAS features an advanced strobing video extraction module that specifically identifies strobing frames from laryngeal videostroboscopy by analyzing hue, saturation, and value fluctuations. Beyond key segment extraction, MLVAS provides effective metrics for Vocal Fold Paralysis (VFP) detection. It employs a novel two-stage glottis segmentation process using a U-Net for initial segmentation, followed by a diffusion-based refinement to reduce false positives, providing better segmentation masks for downstream tasks. MLVAS estimates the vibration dynamics for both left and right vocal folds from the segmented glottis masks to detect unilateral VFP by measuring the angle deviation with the estimated glottal midline. Comparing the variance between left's and right's dynamics, the system effectively distinguishes between left and right VFP. We conducted several ablation studies to demonstrate the effectiveness of each module in the proposed MLVAS. The experimental results on a public segmentation dataset show the effectiveness of our proposed segmentation module. In addition, VFP classification results on a real-world clinic dataset demonstrate MLVAS's ability of providing reliable and objective metrics as well as visualization for assisted clinical diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03597v2-abstract-full').style.display = 'none'; document.getElementById('2409.03597v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to JBHI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14340">arXiv:2408.14340</a> <span> [<a href="https://arxiv.org/pdf/2408.14340">pdf</a>, <a href="https://arxiv.org/format/2408.14340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Foundation Models for Music: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ma%2C+Y">Yinghao Ma</a>, <a href="/search/eess?searchtype=author&query=%C3%98land%2C+A">Anders 脴land</a>, <a href="/search/eess?searchtype=author&query=Ragni%2C+A">Anton Ragni</a>, <a href="/search/eess?searchtype=author&query=Del+Sette%2C+B+M">Bleiz MacSen Del Sette</a>, <a href="/search/eess?searchtype=author&query=Saitis%2C+C">Charalampos Saitis</a>, <a href="/search/eess?searchtype=author&query=Donahue%2C+C">Chris Donahue</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+C">Chenghua Lin</a>, <a href="/search/eess?searchtype=author&query=Plachouras%2C+C">Christos Plachouras</a>, <a href="/search/eess?searchtype=author&query=Benetos%2C+E">Emmanouil Benetos</a>, <a href="/search/eess?searchtype=author&query=Shatri%2C+E">Elona Shatri</a>, <a href="/search/eess?searchtype=author&query=Morreale%2C+F">Fabio Morreale</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/eess?searchtype=author&query=Fazekas%2C+G">Gy枚rgy Fazekas</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+G">Gus Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Huan Zhang</a>, <a href="/search/eess?searchtype=author&query=Manco%2C+I">Ilaria Manco</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiawen Huang</a>, <a href="/search/eess?searchtype=author&query=Guinot%2C+J">Julien Guinot</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+L">Liwei Lin</a>, <a href="/search/eess?searchtype=author&query=Marinelli%2C+L">Luca Marinelli</a>, <a href="/search/eess?searchtype=author&query=Lam%2C+M+W+Y">Max W. Y. Lam</a>, <a href="/search/eess?searchtype=author&query=Sharma%2C+M">Megha Sharma</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+Q">Qiuqiang Kong</a>, <a href="/search/eess?searchtype=author&query=Dannenberg%2C+R+B">Roger B. Dannenberg</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+R">Ruibin Yuan</a> , et al. (17 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14340v3-abstract-short" style="display: inline;"> In recent years, foundation models (FMs) such as large language models (LLMs) and latent diffusion models (LDMs) have profoundly impacted diverse sectors, including music. This comprehensive review examines state-of-the-art (SOTA) pre-trained models and foundation models in music, spanning from representation learning, generative learning and multimodal learning. We first contextualise the signifi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14340v3-abstract-full').style.display = 'inline'; document.getElementById('2408.14340v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14340v3-abstract-full" style="display: none;"> In recent years, foundation models (FMs) such as large language models (LLMs) and latent diffusion models (LDMs) have profoundly impacted diverse sectors, including music. This comprehensive review examines state-of-the-art (SOTA) pre-trained models and foundation models in music, spanning from representation learning, generative learning and multimodal learning. We first contextualise the significance of music in various industries and trace the evolution of AI in music. By delineating the modalities targeted by foundation models, we discover many of the music representations are underexplored in FM development. Then, emphasis is placed on the lack of versatility of previous methods on diverse music applications, along with the potential of FMs in music understanding, generation and medical application. By comprehensively exploring the details of the model pre-training paradigm, architectural choices, tokenisation, finetuning methodologies and controllability, we emphasise the important topics that should have been well explored, like instruction tuning and in-context learning, scaling law and emergent ability, as well as long-sequence modelling etc. A dedicated section presents insights into music agents, accompanied by a thorough analysis of datasets and evaluations essential for pre-training and downstream tasks. Finally, by underscoring the vital importance of ethical considerations, we advocate that following research on FM for music should focus more on such issues as interpretability, transparency, human responsibility, and copyright issues. The paper offers insights into future challenges and trends on FMs for music, aiming to shape the trajectory of human-AI collaboration in the music realm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14340v3-abstract-full').style.display = 'none'; document.getElementById('2408.14340v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13978">arXiv:2408.13978</a> <span> [<a href="https://arxiv.org/pdf/2408.13978">pdf</a>, <a href="https://arxiv.org/format/2408.13978">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Histology Virtual Staining with Mask-Guided Adversarial Transfer Learning for Tertiary Lymphoid Structure Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qiuli Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yongxu Liu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+L">Li Ma</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xianqi Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+X">Xiaohong Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13978v1-abstract-short" style="display: inline;"> Histological Tertiary Lymphoid Structures (TLSs) are increasingly recognized for their correlation with the efficacy of immunotherapy in various solid tumors. Traditionally, the identification and characterization of TLSs rely on immunohistochemistry (IHC) staining techniques, utilizing markers such as CD20 for B cells. Despite the specificity of IHC, Hematoxylin-Eosin (H&E) staining offers a more… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13978v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13978v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13978v1-abstract-full" style="display: none;"> Histological Tertiary Lymphoid Structures (TLSs) are increasingly recognized for their correlation with the efficacy of immunotherapy in various solid tumors. Traditionally, the identification and characterization of TLSs rely on immunohistochemistry (IHC) staining techniques, utilizing markers such as CD20 for B cells. Despite the specificity of IHC, Hematoxylin-Eosin (H&E) staining offers a more accessible and cost-effective choice. Capitalizing on the prevalence of H&E staining slides, we introduce a novel Mask-Guided Adversarial Transfer Learning method designed for virtual pathological staining. This method adeptly captures the nuanced color variations across diverse tissue types under various staining conditions, such as nucleus, red blood cells, positive reaction regions, without explicit label information, and adeptly synthesizes realistic IHC-like virtual staining patches, even replicating the positive reaction. Further, we propose the Virtual IHC Pathology Analysis Network (VIPA-Net), an integrated framework encompassing a Mask-Guided Transfer Module and an H&E-Based Virtual Staining TLS Detection Module. VIPA-Net synergistically harnesses both H\&E staining slides and the synthesized virtual IHC patches to enhance the detection of TLSs within H&E Whole Slide Images (WSIs). We evaluate the network with a comprehensive dataset comprising 1019 annotated slides from The Cancer Genome Atlas (TCGA). Experimental results compellingly illustrate that the VIPA-Net substantially elevates TLS detection accuracy, effectively circumventing the need for actual CD20 staining across the public dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13978v1-abstract-full').style.display = 'none'; document.getElementById('2408.13978v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13975">arXiv:2408.13975</a> <span> [<a href="https://arxiv.org/pdf/2408.13975">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Cross-sectional imaging of speed-of-sound distribution using photoacoustic reversal beacons </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yang Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+D">Danni Wang</a>, <a href="/search/eess?searchtype=author&query=Zhong%2C+L">Liting Zhong</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yi Zhou</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qing Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wufan Chen</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+L">Li Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13975v1-abstract-short" style="display: inline;"> Photoacoustic tomography (PAT) enables non-invasive cross-sectional imaging of biological tissues, but it fails to map the spatial variation of speed-of-sound (SOS) within tissues. While SOS is intimately linked to density and elastic modulus of tissues, the imaging of SOS distri-bution serves as a complementary imaging modality to PAT. Moreover, an accurate SOS map can be leveraged to correct for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13975v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13975v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13975v1-abstract-full" style="display: none;"> Photoacoustic tomography (PAT) enables non-invasive cross-sectional imaging of biological tissues, but it fails to map the spatial variation of speed-of-sound (SOS) within tissues. While SOS is intimately linked to density and elastic modulus of tissues, the imaging of SOS distri-bution serves as a complementary imaging modality to PAT. Moreover, an accurate SOS map can be leveraged to correct for PAT image degradation arising from acoustic heterogene-ities. Herein, we propose a novel approach for SOS reconstruction using only PAT imaging modality. Our method is based on photoacoustic reversal beacons (PRBs), which are small light-absorbing targets with strong photoacoustic contrast. We excite and scan a number of PRBs positioned at the periphery of the target, and the generated photoacoustic waves prop-agate through the target from various directions, thereby achieve spatial sampling of the internal SOS. We formulate a linear inverse model for pixel-wise SOS reconstruction and solve it with iterative optimization technique. We validate the feasibility of the proposed method through simulations, phantoms, and ex vivo biological tissue tests. Experimental results demonstrate that our approach can achieve accurate reconstruction of SOS distribu-tion. Leveraging the obtained SOS map, we further demonstrate significantly enhanced PAT image reconstruction with acoustic correction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13975v1-abstract-full').style.display = 'none'; document.getElementById('2408.13975v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13483">arXiv:2408.13483</a> <span> [<a href="https://arxiv.org/pdf/2408.13483">pdf</a>, <a href="https://arxiv.org/format/2408.13483">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Transmissive RIS Enabled Transceiver Systems:Architecture, Design Issues and Opportunities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhendong Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Ziwei Liu</a>, <a href="/search/eess?searchtype=author&query=He%2C+C">Chong He</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+X">Xudong Bai</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13483v1-abstract-short" style="display: inline;"> Reconfigurable intelligent surface (RIS) is anticipated to augment the performance of beyond fifth-generation (B5G) and sixth-generation (6G) networks by intelligently manipulating the state of its components. Rather than employing reflective RIS for aided communications, this paper proposes an innovative transmissive RIS-enabled transceiver (TRTC) architecture that can accomplish the functions of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13483v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13483v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13483v1-abstract-full" style="display: none;"> Reconfigurable intelligent surface (RIS) is anticipated to augment the performance of beyond fifth-generation (B5G) and sixth-generation (6G) networks by intelligently manipulating the state of its components. Rather than employing reflective RIS for aided communications, this paper proposes an innovative transmissive RIS-enabled transceiver (TRTC) architecture that can accomplish the functions of traditional multi-antenna systems in a cost-effective and energy-efficient manner. First, the proposed network architecture and its corresponding transmission scheme are elaborated from the perspectives of downlink (DL) and uplink (UL) transmissions. Then, we illustrate several significant advantages and differences of TRTC compared to other multiantenna systems. Furthermore, the downlink modulation and extraction principle based on time-modulation array (TMA) is introduced in detail to tackle the multi-stream communications. Moreover, a near-far field channel model appropriate for this architecture is proposed. Based on the channel model, we summarize some state-of-the-art channel estimation schemes, and the channel estimation scheme of TRTC is also provided. Considering the optimization for DL and UL communications, we present numerical simulations that confirm the superiority of the proposed optimization algorithm. Lastly, numerous prospective research avenues for TRTC systems are delineated to inspire further exploration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13483v1-abstract-full').style.display = 'none'; document.getElementById('2408.13483v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE VTM, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12069">arXiv:2408.12069</a> <span> [<a href="https://arxiv.org/pdf/2408.12069">pdf</a>, <a href="https://arxiv.org/format/2408.12069">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Rotatable Block-Controlled RIS: Bridging the Performance Gap to Element-Controlled Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weicong Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+X">Xinyi Yang</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+C">Chao-Kai Wen</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+W">Wankai Tang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jinghe Wang</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+Y">Yifei Yuan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiao Li</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+S">Shi Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12069v1-abstract-short" style="display: inline;"> The passive reconfigurable intelligent surface (RIS) requires numerous elements to achieve adequate array gain, which linearly increases power consumption (PC) with the number of reflection phases. To address this, this letter introduces a rotatable block-controlled RIS (BC-RIS) that preserves spectral efficiency (SE) while reducing power costs. Unlike the element-controlled RIS (EC-RIS), which ne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12069v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12069v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12069v1-abstract-full" style="display: none;"> The passive reconfigurable intelligent surface (RIS) requires numerous elements to achieve adequate array gain, which linearly increases power consumption (PC) with the number of reflection phases. To address this, this letter introduces a rotatable block-controlled RIS (BC-RIS) that preserves spectral efficiency (SE) while reducing power costs. Unlike the element-controlled RIS (EC-RIS), which necessitates independent phase control for each element, the BC-RIS uses a single phase control circuit for each block, substantially lowering power requirements. In the maximum ratio transmission, by customizing specular reflection channels through the rotation of blocks and coherently superimposing signals with optimized reflection phase of blocks, the BC-RIS achieves the same averaged SE as the EC-RIS. To counteract the added power demands from rotation, influenced by block size, we have developed a segmentation scheme to minimize overall PC. Furthermore, constraints for rotation power-related parameters have been established to enhance the energy efficiency of the BC-RIS compared to the EC-RIS. Numerical results confirm that this approach significantly improves energy efficiency while maintaining performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12069v1-abstract-full').style.display = 'none'; document.getElementById('2408.12069v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07665">arXiv:2408.07665</a> <span> [<a href="https://arxiv.org/pdf/2408.07665">pdf</a>, <a href="https://arxiv.org/ps/2408.07665">ps</a>, <a href="https://arxiv.org/format/2408.07665">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Spoken Stereoset: On Evaluating Social Bias Toward Speaker in Speech Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yi-Cheng Lin</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei-Chih Chen</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+H">Hung-yi Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07665v1-abstract-short" style="display: inline;"> Warning: This paper may contain texts with uncomfortable content. Large Language Models (LLMs) have achieved remarkable performance in various tasks, including those involving multimodal data like speech. However, these models often exhibit biases due to the nature of their training data. Recently, more Speech Large Language Models (SLLMs) have emerged, underscoring the urgent need to address th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07665v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07665v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07665v1-abstract-full" style="display: none;"> Warning: This paper may contain texts with uncomfortable content. Large Language Models (LLMs) have achieved remarkable performance in various tasks, including those involving multimodal data like speech. However, these models often exhibit biases due to the nature of their training data. Recently, more Speech Large Language Models (SLLMs) have emerged, underscoring the urgent need to address these biases. This study introduces Spoken Stereoset, a dataset specifically designed to evaluate social biases in SLLMs. By examining how different models respond to speech from diverse demographic groups, we aim to identify these biases. Our experiments reveal significant insights into their performance and bias levels. The findings indicate that while most models show minimal bias, some still exhibit slightly stereotypical or anti-stereotypical tendencies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07665v1-abstract-full').style.display = 'none'; document.getElementById('2408.07665v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02025">arXiv:2408.02025</a> <span> [<a href="https://arxiv.org/pdf/2408.02025">pdf</a>, <a href="https://arxiv.org/format/2408.02025">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Contrastive Learning-based Chaining-Cluster for Multilingual Voice-Face Association </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wuyang Chen</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Yanjie Sun</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+K">Kele Xu</a>, <a href="/search/eess?searchtype=author&query=Dou%2C+Y">Yong Dou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02025v2-abstract-short" style="display: inline;"> The innate correlation between a person's face and voice has recently emerged as a compelling area of study, especially within the context of multilingual environments. This paper introduces our novel solution to the Face-Voice Association in Multilingual Environments (FAME) 2024 challenge, focusing on a contrastive learning-based chaining-cluster method to enhance face-voice association. This tas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02025v2-abstract-full').style.display = 'inline'; document.getElementById('2408.02025v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02025v2-abstract-full" style="display: none;"> The innate correlation between a person's face and voice has recently emerged as a compelling area of study, especially within the context of multilingual environments. This paper introduces our novel solution to the Face-Voice Association in Multilingual Environments (FAME) 2024 challenge, focusing on a contrastive learning-based chaining-cluster method to enhance face-voice association. This task involves the challenges of building biometric relations between auditory and visual modality cues and modelling the prosody interdependence between different languages while addressing both intrinsic and extrinsic variability present in the data. To handle these non-trivial challenges, our method employs supervised cross-contrastive (SCC) learning to establish robust associations between voices and faces in multi-language scenarios. Following this, we have specifically designed a chaining-cluster-based post-processing step to mitigate the impact of outliers often found in unconstrained in the wild data. We conducted extensive experiments to investigate the impact of language on face-voice association. The overall results were evaluated on the FAME public evaluation platform, where we achieved 2nd place. The results demonstrate the superior performance of our method, and we validate the robustness and effectiveness of our proposed approach. Code is available at https://github.com/colaudiolab/FAME24_solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02025v2-abstract-full').style.display = 'none'; document.getElementById('2408.02025v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19511">arXiv:2407.19511</a> <span> [<a href="https://arxiv.org/pdf/2407.19511">pdf</a>, <a href="https://arxiv.org/ps/2407.19511">ps</a>, <a href="https://arxiv.org/format/2407.19511">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Suppressing Beam Squint Effect For Near-Field Wideband Communication Through Movable Antennas </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+Y">Yanze Zhu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qingqing Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Q">Qingjiang Shi</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19511v1-abstract-short" style="display: inline;"> In this correspondence, we study deploying movable antenna (MA) array in a wideband multiple-input-single-output (MISO) communication system, where near-field (NF) channel model is considered. To alleviate beam squint effect, we propose to maximize the minimum analog beamforming gain across the entire wideband spectrum by appropriately adjusting MAs' positions, which is a highly challenging task.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19511v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19511v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19511v1-abstract-full" style="display: none;"> In this correspondence, we study deploying movable antenna (MA) array in a wideband multiple-input-single-output (MISO) communication system, where near-field (NF) channel model is considered. To alleviate beam squint effect, we propose to maximize the minimum analog beamforming gain across the entire wideband spectrum by appropriately adjusting MAs' positions, which is a highly challenging task. By introducing a slack variable and adopting the cutting-the-edge smoothed-gradient-descent-ascent (SGDA) method, we develop algorithms to resolve the aforementioned challenge. Numerical results verify the effectiveness of our proposed algorithms and demonstrate the benefit of utilizing MA array to mitigate beam squint effect in NF wideband system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19511v1-abstract-full').style.display = 'none'; document.getElementById('2407.19511v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 4 figures, submitted to IEEE journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18986">arXiv:2407.18986</a> <span> [<a href="https://arxiv.org/pdf/2407.18986">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> TERIME: An improved RIME algorithm with enhanced exploration and exploitation for robust parameter extraction of photovoltaic models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+S">Shi-Shun Chen</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Y">Yu-Tong Jiang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen-Bin Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiao-Yang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18986v2-abstract-short" style="display: inline;"> Parameter extraction of photovoltaic (PV) models is crucial for the planning, optimization, and control of PV systems. Although some methods using meta-heuristic algorithms have been proposed to determine these parameters, the robustness of solutions obtained by these methods faces great challenges when the complexity of the PV model increases. The unstable results will affect the reliable operati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18986v2-abstract-full').style.display = 'inline'; document.getElementById('2407.18986v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18986v2-abstract-full" style="display: none;"> Parameter extraction of photovoltaic (PV) models is crucial for the planning, optimization, and control of PV systems. Although some methods using meta-heuristic algorithms have been proposed to determine these parameters, the robustness of solutions obtained by these methods faces great challenges when the complexity of the PV model increases. The unstable results will affect the reliable operation and maintenance strategies of PV systems. In response to this challenge, an improved RIME algorithm with enhanced exploration and exploitation is proposed for robust and accurate parameter identification for various PV models. Specifically, the differential evolution mutation operator is integrated in the exploration phase to enhance the population diversity. Meanwhile, a new exploitation strategy incorporating randomization and neighborhood strategies simultaneously is developed to maintain the balance of exploitation width and depth. The improved RIME algorithm is applied to estimate the optimal parameters of the single-diode model (SDM), double-diode model (DDM), and triple-diode model (TDM) combined with the Lambert-W function for three PV cell and module types including RTC France, Photo Watt-PWP 201 and S75. According to the statistical analysis in 100 runs, the TEIMRE achieves more accurate and robust parameter estimations than other techniques to various PV models in varying environmental conditions. All of our source codes are publicly available at https://github.com/dirge1/TERIME. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18986v2-abstract-full').style.display = 'none'; document.getElementById('2407.18986v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15139">arXiv:2407.15139</a> <span> [<a href="https://arxiv.org/pdf/2407.15139">pdf</a>, <a href="https://arxiv.org/ps/2407.15139">ps</a>, <a href="https://arxiv.org/format/2407.15139">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> An Interface Method for Co-simulation of EMT Model and Shifted Frequency EMT Model Based on Rotational Invariance Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gao%2C+S">Shilin Gao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Ying Chen</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+Z">Zhitong Yu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wensheng Chen</a>, <a href="/search/eess?searchtype=author&query=Song%2C+Y">Yankan Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15139v2-abstract-short" style="display: inline;"> The shifted frequency-based electromagnetic transient (SFEMT) simulation has greatly improved the computational efficiency of traditional electromagnetic transient (EMT) simulation for the ac grid. This letter proposes a novel interface for the co-simulation of the SFEMT model and the traditional EMT model. The general form of SFEMT modeling and the principle of analytical signal construction are… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15139v2-abstract-full').style.display = 'inline'; document.getElementById('2407.15139v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15139v2-abstract-full" style="display: none;"> The shifted frequency-based electromagnetic transient (SFEMT) simulation has greatly improved the computational efficiency of traditional electromagnetic transient (EMT) simulation for the ac grid. This letter proposes a novel interface for the co-simulation of the SFEMT model and the traditional EMT model. The general form of SFEMT modeling and the principle of analytical signal construction are first derived. Then, an interface for the co-simulation of EMT and SFEMT simulation is proposed based on rotational invariance techniques. Theoretical analyses and test results demonstrate the effectiveness of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15139v2-abstract-full').style.display = 'none'; document.getElementById('2407.15139v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13123">arXiv:2407.13123</a> <span> [<a href="https://arxiv.org/pdf/2407.13123">pdf</a>, <a href="https://arxiv.org/format/2407.13123">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Reconfigurable Intelligent Surface Aided Vehicular Edge Computing: Joint Phase-shift Optimization and Multi-User Power Allocation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qi%2C+K">Kangwei Qi</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qiong Wu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+P">Pingyi Fan</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+N">Nan Cheng</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wen Chen</a>, <a href="/search/eess?searchtype=author&query=Letaief%2C+K+B">Khaled B. Letaief</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13123v1-abstract-short" style="display: inline;"> Vehicular edge computing (VEC) is an emerging technology with significant potential in the field of internet of vehicles (IoV), enabling vehicles to perform intensive computational tasks locally or offload them to nearby edge devices. However, the quality of communication links may be severely deteriorated due to obstacles such as buildings, impeding the offloading process. To address this challen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13123v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13123v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13123v1-abstract-full" style="display: none;"> Vehicular edge computing (VEC) is an emerging technology with significant potential in the field of internet of vehicles (IoV), enabling vehicles to perform intensive computational tasks locally or offload them to nearby edge devices. However, the quality of communication links may be severely deteriorated due to obstacles such as buildings, impeding the offloading process. To address this challenge, we introduce the use of Reconfigurable Intelligent Surfaces (RIS), which provide alternative communication pathways to assist vehicular communication. By dynamically adjusting the phase-shift of the RIS, the performance of VEC systems can be substantially improved. In this work, we consider a RIS-assisted VEC system, and design an optimal scheme for local execution power, offloading power, and RIS phase-shift, where random task arrivals and channel variations are taken into account. To address the scheme, we propose an innovative deep reinforcement learning (DRL) framework that combines the Deep Deterministic Policy Gradient (DDPG) algorithm for optimizing RIS phase-shift coefficients and the Multi-Agent Deep Deterministic Policy Gradient (MADDPG) algorithm for optimizing the power allocation of vehicle user (VU). Simulation results show that our proposed scheme outperforms the traditional centralized DDPG, Twin Delayed Deep Deterministic Policy Gradient (TD3) and some typical stochastic schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13123v1-abstract-full').style.display = 'none'; document.getElementById('2407.13123v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been submitted to IEEE Journal. The source code has been released at https://github.com/qiongwu86/DDPG-RIS-MADDPG-POWER. arXiv admin note: text overlap with arXiv:2406.11318</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Chen%2C+W&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Chen%2C+W&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+W&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+W&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+W&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Chen%2C+W&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>