CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 260 results for author: <span class="mathjax">Huang, J</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Huang%2C+J">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Huang, J"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Huang%2C+J&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Huang, J"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Huang%2C+J&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12448">arXiv:2411.12448</a> <span> [<a href="https://arxiv.org/pdf/2411.12448">pdf</a>, <a href="https://arxiv.org/format/2411.12448">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models for Lossless Image Compression: Next-Pixel Prediction in Language Space is All You Need </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+K">Kecheng Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+P">Pingping Zhang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jie Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yibing Liu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiaxin Huang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shiqi Wang</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+H">Hong Yan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Haoliang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12448v2-abstract-short" style="display: inline;"> We have recently witnessed that ``Intelligence" and `` Compression" are the two sides of the same coin, where the language large model (LLM) with unprecedented intelligence is a general-purpose lossless compressor for various data modalities. This attribute particularly appeals to the lossless image compression community, given the increasing need to compress high-resolution images in the current… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12448v2-abstract-full').style.display = 'inline'; document.getElementById('2411.12448v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12448v2-abstract-full" style="display: none;"> We have recently witnessed that ``Intelligence" and `` Compression" are the two sides of the same coin, where the language large model (LLM) with unprecedented intelligence is a general-purpose lossless compressor for various data modalities. This attribute particularly appeals to the lossless image compression community, given the increasing need to compress high-resolution images in the current streaming media era. Consequently, a spontaneous envision emerges: Can the compression performance of the LLM elevate lossless image compression to new heights? However, our findings indicate that the naive application of LLM-based lossless image compressors suffers from a considerable performance gap compared with existing state-of-the-art (SOTA) codecs on common benchmark datasets. In light of this, we are dedicated to fulfilling the unprecedented intelligence (compression) capacity of the LLM for lossless image compression tasks, thereby bridging the gap between theoretical and practical compression performance. Specifically, we propose P$^{2}$-LLM, a next-pixel prediction-based LLM, which integrates various elaborated insights and methodologies, \textit{e.g.,} pixel-level priors, the in-context ability of LLM, and a pixel-level semantic preservation strategy, to enhance the understanding capacity of pixel sequences for better next-pixel predictions. Extensive experiments on benchmark datasets demonstrate that P$^{2}$-LLM can beat SOTA classical and learned codecs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12448v2-abstract-full').style.display = 'none'; document.getElementById('2411.12448v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09593">arXiv:2411.09593</a> <span> [<a href="https://arxiv.org/pdf/2411.09593">pdf</a>, <a href="https://arxiv.org/format/2411.09593">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SMILE-UHURA Challenge -- Small Vessel Segmentation at Mesoscopic Scale from Ultra-High Resolution 7T Magnetic Resonance Angiograms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chatterjee%2C+S">Soumick Chatterjee</a>, <a href="/search/eess?searchtype=author&query=Mattern%2C+H">Hendrik Mattern</a>, <a href="/search/eess?searchtype=author&query=D%C3%B6rner%2C+M">Marc D枚rner</a>, <a href="/search/eess?searchtype=author&query=Sciarra%2C+A">Alessandro Sciarra</a>, <a href="/search/eess?searchtype=author&query=Dubost%2C+F">Florian Dubost</a>, <a href="/search/eess?searchtype=author&query=Schnurre%2C+H">Hannes Schnurre</a>, <a href="/search/eess?searchtype=author&query=Khatun%2C+R">Rupali Khatun</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+C">Chun-Chih Yu</a>, <a href="/search/eess?searchtype=author&query=Hsieh%2C+T">Tsung-Lin Hsieh</a>, <a href="/search/eess?searchtype=author&query=Tsai%2C+Y">Yi-Shan Tsai</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yi-Zeng Fang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yung-Ching Yang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Juinn-Dar Huang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+M">Marshall Xu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Siyu Liu</a>, <a href="/search/eess?searchtype=author&query=Ribeiro%2C+F+L">Fernanda L. Ribeiro</a>, <a href="/search/eess?searchtype=author&query=Bollmann%2C+S">Saskia Bollmann</a>, <a href="/search/eess?searchtype=author&query=Chintalapati%2C+K+V">Karthikesh Varma Chintalapati</a>, <a href="/search/eess?searchtype=author&query=Radhakrishna%2C+C+M">Chethan Mysuru Radhakrishna</a>, <a href="/search/eess?searchtype=author&query=Kumara%2C+S+C+H+R">Sri Chandana Hudukula Ram Kumara</a>, <a href="/search/eess?searchtype=author&query=Sutrave%2C+R">Raviteja Sutrave</a>, <a href="/search/eess?searchtype=author&query=Qayyum%2C+A">Abdul Qayyum</a>, <a href="/search/eess?searchtype=author&query=Mazher%2C+M">Moona Mazher</a>, <a href="/search/eess?searchtype=author&query=Razzak%2C+I">Imran Razzak</a>, <a href="/search/eess?searchtype=author&query=Rodero%2C+C">Cristobal Rodero</a> , et al. (23 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09593v1-abstract-short" style="display: inline;"> The human brain receives nutrients and oxygen through an intricate network of blood vessels. Pathology affecting small vessels, at the mesoscopic scale, represents a critical vulnerability within the cerebral blood supply and can lead to severe conditions, such as Cerebral Small Vessel Diseases. The advent of 7 Tesla MRI systems has enabled the acquisition of higher spatial resolution images, maki… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09593v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09593v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09593v1-abstract-full" style="display: none;"> The human brain receives nutrients and oxygen through an intricate network of blood vessels. Pathology affecting small vessels, at the mesoscopic scale, represents a critical vulnerability within the cerebral blood supply and can lead to severe conditions, such as Cerebral Small Vessel Diseases. The advent of 7 Tesla MRI systems has enabled the acquisition of higher spatial resolution images, making it possible to visualise such vessels in the brain. However, the lack of publicly available annotated datasets has impeded the development of robust, machine learning-driven segmentation algorithms. To address this, the SMILE-UHURA challenge was organised. This challenge, held in conjunction with the ISBI 2023, in Cartagena de Indias, Colombia, aimed to provide a platform for researchers working on related topics. The SMILE-UHURA challenge addresses the gap in publicly available annotated datasets by providing an annotated dataset of Time-of-Flight angiography acquired with 7T MRI. This dataset was created through a combination of automated pre-segmentation and extensive manual refinement. In this manuscript, sixteen submitted methods and two baseline methods are compared both quantitatively and qualitatively on two different datasets: held-out test MRAs from the same dataset as the training data (with labels kept secret) and a separate 7T ToF MRA dataset where both input volumes and labels are kept secret. The results demonstrate that most of the submitted deep learning methods, trained on the provided training dataset, achieved reliable segmentation performance. Dice scores reached up to 0.838 $\pm$ 0.066 and 0.716 $\pm$ 0.125 on the respective datasets, with an average performance of up to 0.804 $\pm$ 0.15. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09593v1-abstract-full').style.display = 'none'; document.getElementById('2411.09593v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11736">arXiv:2410.11736</a> <span> [<a href="https://arxiv.org/pdf/2410.11736">pdf</a>, <a href="https://arxiv.org/format/2410.11736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Near-Field Communications for Extremely Large-Scale MIMO: A Beamspace Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+K">Kangjian Chen</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+C">Chenhao Qi</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jingjia Huang</a>, <a href="/search/eess?searchtype=author&query=Dobre%2C+O+A">Octavia A. Dobre</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G+Y">Geoffrey Ye Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11736v1-abstract-short" style="display: inline;"> Extremely large-scale multiple-input multiple-output (XL-MIMO) is regarded as one of the key techniques to enhance the performance of future wireless communications. Different from regular MIMO, the XL-MIMO shifts part of the communication region from the far field to the near field, where the spherical-wave channel model cannot be accurately approximated by the commonly-adopted planar-wave channe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11736v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11736v1-abstract-full" style="display: none;"> Extremely large-scale multiple-input multiple-output (XL-MIMO) is regarded as one of the key techniques to enhance the performance of future wireless communications. Different from regular MIMO, the XL-MIMO shifts part of the communication region from the far field to the near field, where the spherical-wave channel model cannot be accurately approximated by the commonly-adopted planar-wave channel model. As a result, the well-explored far-field beamspace is unsuitable for near-field communications, thereby requiring the exploration of specialized near-field beamspace. In this article, we investigate the near-field communications for XL-MIMO from the perspective of beamspace. Given the spherical wavefront characteristics of the near-field channels, we first map the antenna space to the near-field beamspace with the fractional Fourier transform. Then, we divide the near-field beamspace into three parts, including high mainlobe, low mainlobe, and sidelobe, and provide a comprehensive analysis of these components. Based on the analysis, we demonstrate the advantages of the near-field beamspace over the existing methods. Finally, we point out several applications of the near-field beamspace and highlight some potential directions for future study in the near-field beamspace. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11736v1-abstract-full').style.display = 'none'; document.getElementById('2410.11736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06998">arXiv:2410.06998</a> <span> [<a href="https://arxiv.org/pdf/2410.06998">pdf</a>, <a href="https://arxiv.org/format/2410.06998">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> An Improved ESO-Based Line-of-Sight Guidance Law for Path Following of Underactuated Autonomous Underwater Helicopter With Nonlinear Tracking Differentiator and Anti-saturation Controller </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+H">Haoda Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zichen Liu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jin Huang</a>, <a href="/search/eess?searchtype=author&query=An%2C+X">Xinyu An</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Ying Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06998v1-abstract-short" style="display: inline;"> This paper presents an Improved Extended-state-observer based Line-of-Sight (IELOS) guidance law for path following of underactuated Autonomous Underwater helicopter (AUH) utilizing a nonlinear tracking differentiator and anti-saturation controller. Due to the high mobility of the AUH, the classical reduced-order Extended-State-Observer (ESO) struggles to accurately track the sideslip angle, espec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06998v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06998v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06998v1-abstract-full" style="display: none;"> This paper presents an Improved Extended-state-observer based Line-of-Sight (IELOS) guidance law for path following of underactuated Autonomous Underwater helicopter (AUH) utilizing a nonlinear tracking differentiator and anti-saturation controller. Due to the high mobility of the AUH, the classical reduced-order Extended-State-Observer (ESO) struggles to accurately track the sideslip angle, especially when rapid variation occurs. By incorporating the nonlinear tracking differentiator and anti-saturation controller, the IELOS guidance law can precisely track sideslip angle and mitigate propeller thrust buffet compared to the classical Extended-state-observer based Line-of-Sight (ELOS) guidance law. The performance of ESO is significantly influenced by the bandwidth, with the Improved Extended-State-Observer (IESO) proving effective at low bandwidths where the classical ESO falls short. The paper establishes the input-to-state stability of the closed-loop system. Subsequently, simulation and pool experimental results are showcased to validate the effectiveness of the IELOS guidance law, which outperforms both the Line-of-Sight (LOS) and Adaptive Line-of-Sight (ALOS) guidance laws in terms of performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06998v1-abstract-full').style.display = 'none'; document.getElementById('2410.06998v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02764">arXiv:2410.02764</a> <span> [<a href="https://arxiv.org/pdf/2410.02764">pdf</a>, <a href="https://arxiv.org/format/2410.02764">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Flash-Splat: 3D Reflection Removal with Flash Cues and Gaussian Splats </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xie%2C+M">Mingyang Xie</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+H">Haoming Cai</a>, <a href="/search/eess?searchtype=author&query=Shah%2C+S">Sachin Shah</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Yiran Xu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+B+Y">Brandon Y. Feng</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jia-Bin Huang</a>, <a href="/search/eess?searchtype=author&query=Metzler%2C+C+A">Christopher A. Metzler</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02764v1-abstract-short" style="display: inline;"> We introduce a simple yet effective approach for separating transmitted and reflected light. Our key insight is that the powerful novel view synthesis capabilities provided by modern inverse rendering methods (e.g.,~3D Gaussian splatting) allow one to perform flash/no-flash reflection separation using unpaired measurements -- this relaxation dramatically simplifies image acquisition over conventio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02764v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02764v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02764v1-abstract-full" style="display: none;"> We introduce a simple yet effective approach for separating transmitted and reflected light. Our key insight is that the powerful novel view synthesis capabilities provided by modern inverse rendering methods (e.g.,~3D Gaussian splatting) allow one to perform flash/no-flash reflection separation using unpaired measurements -- this relaxation dramatically simplifies image acquisition over conventional paired flash/no-flash reflection separation methods. Through extensive real-world experiments, we demonstrate our method, Flash-Splat, accurately reconstructs both transmitted and reflected scenes in 3D. Our method outperforms existing 3D reflection separation methods, which do not leverage illumination control, by a large margin. Our project webpage is at https://flash-splat.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02764v1-abstract-full').style.display = 'none'; document.getElementById('2410.02764v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12167">arXiv:2409.12167</a> <span> [<a href="https://arxiv.org/pdf/2409.12167">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> multiPI-TransBTS: A Multi-Path Learning Framework for Brain Tumor Image Segmentation Based on Multi-Physical Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+H">Hongjun Zhu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiaohang Huang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+K">Kuo Chen</a>, <a href="/search/eess?searchtype=author&query=Ying%2C+X">Xuehui Ying</a>, <a href="/search/eess?searchtype=author&query=Qian%2C+Y">Ying Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12167v1-abstract-short" style="display: inline;"> Brain Tumor Segmentation (BraTS) plays a critical role in clinical diagnosis, treatment planning, and monitoring the progression of brain tumors. However, due to the variability in tumor appearance, size, and intensity across different MRI modalities, automated segmentation remains a challenging task. In this study, we propose a novel Transformer-based framework, multiPI-TransBTS, which integrates… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12167v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12167v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12167v1-abstract-full" style="display: none;"> Brain Tumor Segmentation (BraTS) plays a critical role in clinical diagnosis, treatment planning, and monitoring the progression of brain tumors. However, due to the variability in tumor appearance, size, and intensity across different MRI modalities, automated segmentation remains a challenging task. In this study, we propose a novel Transformer-based framework, multiPI-TransBTS, which integrates multi-physical information to enhance segmentation accuracy. The model leverages spatial information, semantic information, and multi-modal imaging data, addressing the inherent heterogeneity in brain tumor characteristics. The multiPI-TransBTS framework consists of an encoder, an Adaptive Feature Fusion (AFF) module, and a multi-source, multi-scale feature decoder. The encoder incorporates a multi-branch architecture to separately extract modality-specific features from different MRI sequences. The AFF module fuses information from multiple sources using channel-wise and element-wise attention, ensuring effective feature recalibration. The decoder combines both common and task-specific features through a Task-Specific Feature Introduction (TSFI) strategy, producing accurate segmentation outputs for Whole Tumor (WT), Tumor Core (TC), and Enhancing Tumor (ET) regions. Comprehensive evaluations on the BraTS2019 and BraTS2020 datasets demonstrate the superiority of multiPI-TransBTS over the state-of-the-art methods. The model consistently achieves better Dice coefficients, Hausdorff distances, and Sensitivity scores, highlighting its effectiveness in addressing the BraTS challenges. Our results also indicate the need for further exploration of the balance between precision and recall in the ET segmentation task. The proposed framework represents a significant advancement in BraTS, with potential implications for improving clinical outcomes for brain tumor patients. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12167v1-abstract-full').style.display = 'none'; document.getElementById('2409.12167v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07482">arXiv:2409.07482</a> <span> [<a href="https://arxiv.org/pdf/2409.07482">pdf</a>, <a href="https://arxiv.org/format/2409.07482">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> VSLLaVA: a pipeline of large multimodal foundation model for industrial vibration signal analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qi Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jinfeng Huang</a>, <a href="/search/eess?searchtype=author&query=He%2C+H">Hongliang He</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xinran Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+F">Feibin Zhang</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+Z">Zhaoye Qin</a>, <a href="/search/eess?searchtype=author&query=Chu%2C+F">Fulei Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07482v1-abstract-short" style="display: inline;"> Large multimodal foundation models have been extensively utilized for image recognition tasks guided by instructions, yet there remains a scarcity of domain expertise in industrial vibration signal analysis. This paper presents a pipeline named VSLLaVA that leverages a large language model to integrate expert knowledge for identification of signal parameters and diagnosis of faults. Within this pi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07482v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07482v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07482v1-abstract-full" style="display: none;"> Large multimodal foundation models have been extensively utilized for image recognition tasks guided by instructions, yet there remains a scarcity of domain expertise in industrial vibration signal analysis. This paper presents a pipeline named VSLLaVA that leverages a large language model to integrate expert knowledge for identification of signal parameters and diagnosis of faults. Within this pipeline, we first introduce an expert rule-assisted signal generator. The generator merges signal provided by vibration analysis experts with domain-specific parameter identification and fault diagnosis question-answer pairs to build signal-question-answer triplets. Then we use these triplets to apply low-rank adaptation methods for fine-tuning the linear layers of the Contrastive Language-Image Pretraining (CLIP) and large language model, injecting multimodal signal processing knowledge. Finally, the fine-tuned model is assessed through the combined efforts of large language model and expert rules to evaluate answer accuracy and relevance, which showcases enhanced performance in identifying, analyzing various signal parameters, and diagnosing faults. These enhancements indicate the potential of this pipeline to build a foundational model for future industrial signal analysis and monitoring. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07482v1-abstract-full').style.display = 'none'; document.getElementById('2409.07482v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02070">arXiv:2409.02070</a> <span> [<a href="https://arxiv.org/pdf/2409.02070">pdf</a>, <a href="https://arxiv.org/format/2409.02070">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Explicit Differentiable Slicing and Global Deformation for Cardiac Mesh Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yihao Luo</a>, <a href="/search/eess?searchtype=author&query=Sesia%2C+D">Dario Sesia</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yinzhe Wu</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+W">Wenhao Ding</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+F">Fadong Shi</a>, <a href="/search/eess?searchtype=author&query=Shah%2C+A">Anoop Shah</a>, <a href="/search/eess?searchtype=author&query=Kaural%2C+A">Amit Kaural</a>, <a href="/search/eess?searchtype=author&query=Mayet%2C+J">Jamil Mayet</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a>, <a href="/search/eess?searchtype=author&query=Yap%2C+C">ChoonHwai Yap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02070v2-abstract-short" style="display: inline;"> Mesh reconstruction of the cardiac anatomy from medical images is useful for shape and motion measurements and biophysics simulations to facilitate the assessment of cardiac function and health. However, 3D medical images are often acquired as 2D slices that are sparsely sampled and noisy, and mesh reconstruction on such data is a challenging task. Traditional voxel-based approaches rely on pre- a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02070v2-abstract-full').style.display = 'inline'; document.getElementById('2409.02070v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02070v2-abstract-full" style="display: none;"> Mesh reconstruction of the cardiac anatomy from medical images is useful for shape and motion measurements and biophysics simulations to facilitate the assessment of cardiac function and health. However, 3D medical images are often acquired as 2D slices that are sparsely sampled and noisy, and mesh reconstruction on such data is a challenging task. Traditional voxel-based approaches rely on pre- and post-processing that compromises image fidelity, while mesh-level deep learning approaches require mesh annotations that are difficult to get. Therefore, direct cross-domain supervision from 2D images to meshes is a key technique for advancing 3D learning in medical imaging, but it has not been well-developed. While there have been attempts to approximate the optimized meshes' slicing, few existing methods directly use 2D slices to supervise mesh reconstruction in a differentiable manner. Here, we propose a novel explicit differentiable voxelization and slicing (DVS) algorithm that allows gradient backpropagation to a mesh from its slices, facilitating refined mesh optimization directly supervised by the losses defined on 2D images. Further, we propose an innovative framework for extracting patient-specific left ventricle (LV) meshes from medical images by coupling DVS with a graph harmonic deformation (GHD) mesh morphing descriptor of cardiac shape that naturally preserves mesh quality and smoothness during optimization. Experimental results demonstrate that our method achieves state-of-the-art performance in cardiac mesh reconstruction tasks from CT and MRI, with an overall Dice score of 90% on multi-datasets, outperforming existing approaches. The proposed method can further quantify clinically useful parameters such as ejection fraction and global myocardial strains, closely matching the ground truth and surpassing the traditional voxel-based approach in sparse images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02070v2-abstract-full').style.display = 'none'; document.getElementById('2409.02070v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01544">arXiv:2409.01544</a> <span> [<a href="https://arxiv.org/pdf/2409.01544">pdf</a>, <a href="https://arxiv.org/format/2409.01544">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning Task-Specific Sampling Strategy for Sparse-View CT Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+L">Liutao Yang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yingying Fang</a>, <a href="/search/eess?searchtype=author&query=Aviles-Rivero%2C+A+I">Angelica I Aviles-Rivero</a>, <a href="/search/eess?searchtype=author&query=Schonlieb%2C+C">Carola-Bibiane Schonlieb</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+D">Daoqiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01544v1-abstract-short" style="display: inline;"> Sparse-View Computed Tomography (SVCT) offers low-dose and fast imaging but suffers from severe artifacts. Optimizing the sampling strategy is an essential approach to improving the imaging quality of SVCT. However, current methods typically optimize a universal sampling strategy for all types of scans, overlooking the fact that the optimal strategy may vary depending on the specific scanning task… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01544v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01544v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01544v1-abstract-full" style="display: none;"> Sparse-View Computed Tomography (SVCT) offers low-dose and fast imaging but suffers from severe artifacts. Optimizing the sampling strategy is an essential approach to improving the imaging quality of SVCT. However, current methods typically optimize a universal sampling strategy for all types of scans, overlooking the fact that the optimal strategy may vary depending on the specific scanning task, whether it involves particular body scans (e.g., chest CT scans) or downstream clinical applications (e.g., disease diagnosis). The optimal strategy for one scanning task may not perform as well when applied to other tasks. To address this problem, we propose a deep learning framework that learns task-specific sampling strategies with a multi-task approach to train a unified reconstruction network while tailoring optimal sampling strategies for each individual task. Thus, a task-specific sampling strategy can be applied for each type of scans to improve the quality of SVCT imaging and further assist in performance of downstream clinical usage. Extensive experiments across different scanning types provide validation for the effectiveness of task-specific sampling strategies in enhancing imaging quality. Experiments involving downstream tasks verify the clinical value of learned sampling strategies, as evidenced by notable improvements in downstream task performance. Furthermore, the utilization of a multi-task framework with a shared reconstruction network facilitates deployment on current imaging devices with switchable task-specific modules, and allows for easily integrate new tasks without retraining the entire model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01544v1-abstract-full').style.display = 'none'; document.getElementById('2409.01544v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00587">arXiv:2409.00587</a> <span> [<a href="https://arxiv.org/pdf/2409.00587">pdf</a>, <a href="https://arxiv.org/format/2409.00587">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> FLUX that Plays Music </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Fei%2C+Z">Zhengcong Fei</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+M">Mingyuan Fan</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+C">Changqian Yu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Junshi Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00587v1-abstract-short" style="display: inline;"> This paper explores a simple extension of diffusion-based rectified flow Transformers for text-to-music generation, termed as FluxMusic. Generally, along with design in advanced Flux\footnote{https://github.com/black-forest-labs/flux} model, we transfers it into a latent VAE space of mel-spectrum. It involves first applying a sequence of independent attention to the double text-music stream, follo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00587v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00587v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00587v1-abstract-full" style="display: none;"> This paper explores a simple extension of diffusion-based rectified flow Transformers for text-to-music generation, termed as FluxMusic. Generally, along with design in advanced Flux\footnote{https://github.com/black-forest-labs/flux} model, we transfers it into a latent VAE space of mel-spectrum. It involves first applying a sequence of independent attention to the double text-music stream, followed by a stacked single music stream for denoised patch prediction. We employ multiple pre-trained text encoders to sufficiently capture caption semantic information as well as inference flexibility. In between, coarse textual information, in conjunction with time step embeddings, is utilized in a modulation mechanism, while fine-grained textual details are concatenated with the music patch sequence as inputs. Through an in-depth study, we demonstrate that rectified flow training with an optimized architecture significantly outperforms established diffusion methods for the text-to-music task, as evidenced by various automatic metrics and human preference evaluations. Our experimental data, code, and model weights are made publicly available at: \url{https://github.com/feizc/FluxMusic}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00587v1-abstract-full').style.display = 'none'; document.getElementById('2409.00587v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15481">arXiv:2408.15481</a> <span> [<a href="https://arxiv.org/pdf/2408.15481">pdf</a>, <a href="https://arxiv.org/ps/2408.15481">ps</a>, <a href="https://arxiv.org/format/2408.15481">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Joint Offloading and Beamforming Design in Integrating Sensing, Communication, and Computing Systems: A Distributed Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+P">Peng Liu</a>, <a href="/search/eess?searchtype=author&query=Fei%2C+Z">Zesong Fei</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xinyi Wang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jingxuan Huang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+J">Jie Hu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J+A">J. Andrew Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15481v1-abstract-short" style="display: inline;"> When applying integrated sensing and communications (ISAC) in future mobile networks, many sensing tasks have low latency requirements, preferably being implemented at terminals. However, terminals often have limited computing capabilities and energy supply. In this paper, we investigate the effectiveness of leveraging the advanced computing capabilities of mobile edge computing (MEC) servers and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15481v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15481v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15481v1-abstract-full" style="display: none;"> When applying integrated sensing and communications (ISAC) in future mobile networks, many sensing tasks have low latency requirements, preferably being implemented at terminals. However, terminals often have limited computing capabilities and energy supply. In this paper, we investigate the effectiveness of leveraging the advanced computing capabilities of mobile edge computing (MEC) servers and the cloud server to address the sensing tasks of ISAC terminals. Specifically, we propose a novel three-tier integrated sensing, communication, and computing (ISCC) framework composed of one cloud server, multiple MEC servers, and multiple terminals, where the terminals can optionally offload sensing data to the MEC server or the cloud server. The offload message is sent via the ISAC waveform, whose echo is used for sensing. We jointly optimize the computation offloading and beamforming strategies to minimize the average execution latency while satisfying sensing requirements. In particular, we propose a low-complexity distributed algorithm to solve the problem. Firstly, we use the alternating direction method of multipliers (ADMM) and derive the closed-form solution for offloading decision variables. Subsequently, we convert the beamforming optimization sub-problem into a weighted minimum mean-square error (WMMSE) problem and propose a fractional programming based algorithm. Numerical results demonstrate that the proposed ISCC framework and distributed algorithm significantly reduce the execution latency and the energy consumption of sensing tasks at a lower computational complexity compared to existing schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15481v1-abstract-full').style.display = 'none'; document.getElementById('2408.15481v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 12 figures, submitted to IEEE journals for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14340">arXiv:2408.14340</a> <span> [<a href="https://arxiv.org/pdf/2408.14340">pdf</a>, <a href="https://arxiv.org/format/2408.14340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Foundation Models for Music: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ma%2C+Y">Yinghao Ma</a>, <a href="/search/eess?searchtype=author&query=%C3%98land%2C+A">Anders 脴land</a>, <a href="/search/eess?searchtype=author&query=Ragni%2C+A">Anton Ragni</a>, <a href="/search/eess?searchtype=author&query=Del+Sette%2C+B+M">Bleiz MacSen Del Sette</a>, <a href="/search/eess?searchtype=author&query=Saitis%2C+C">Charalampos Saitis</a>, <a href="/search/eess?searchtype=author&query=Donahue%2C+C">Chris Donahue</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+C">Chenghua Lin</a>, <a href="/search/eess?searchtype=author&query=Plachouras%2C+C">Christos Plachouras</a>, <a href="/search/eess?searchtype=author&query=Benetos%2C+E">Emmanouil Benetos</a>, <a href="/search/eess?searchtype=author&query=Shatri%2C+E">Elona Shatri</a>, <a href="/search/eess?searchtype=author&query=Morreale%2C+F">Fabio Morreale</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/eess?searchtype=author&query=Fazekas%2C+G">Gy枚rgy Fazekas</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+G">Gus Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Huan Zhang</a>, <a href="/search/eess?searchtype=author&query=Manco%2C+I">Ilaria Manco</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiawen Huang</a>, <a href="/search/eess?searchtype=author&query=Guinot%2C+J">Julien Guinot</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+L">Liwei Lin</a>, <a href="/search/eess?searchtype=author&query=Marinelli%2C+L">Luca Marinelli</a>, <a href="/search/eess?searchtype=author&query=Lam%2C+M+W+Y">Max W. Y. Lam</a>, <a href="/search/eess?searchtype=author&query=Sharma%2C+M">Megha Sharma</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+Q">Qiuqiang Kong</a>, <a href="/search/eess?searchtype=author&query=Dannenberg%2C+R+B">Roger B. Dannenberg</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+R">Ruibin Yuan</a> , et al. (17 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14340v3-abstract-short" style="display: inline;"> In recent years, foundation models (FMs) such as large language models (LLMs) and latent diffusion models (LDMs) have profoundly impacted diverse sectors, including music. This comprehensive review examines state-of-the-art (SOTA) pre-trained models and foundation models in music, spanning from representation learning, generative learning and multimodal learning. We first contextualise the signifi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14340v3-abstract-full').style.display = 'inline'; document.getElementById('2408.14340v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14340v3-abstract-full" style="display: none;"> In recent years, foundation models (FMs) such as large language models (LLMs) and latent diffusion models (LDMs) have profoundly impacted diverse sectors, including music. This comprehensive review examines state-of-the-art (SOTA) pre-trained models and foundation models in music, spanning from representation learning, generative learning and multimodal learning. We first contextualise the significance of music in various industries and trace the evolution of AI in music. By delineating the modalities targeted by foundation models, we discover many of the music representations are underexplored in FM development. Then, emphasis is placed on the lack of versatility of previous methods on diverse music applications, along with the potential of FMs in music understanding, generation and medical application. By comprehensively exploring the details of the model pre-training paradigm, architectural choices, tokenisation, finetuning methodologies and controllability, we emphasise the important topics that should have been well explored, like instruction tuning and in-context learning, scaling law and emergent ability, as well as long-sequence modelling etc. A dedicated section presents insights into music agents, accompanied by a thorough analysis of datasets and evaluations essential for pre-training and downstream tasks. Finally, by underscoring the vital importance of ethical considerations, we advocate that following research on FM for music should focus more on such issues as interpretability, transparency, human responsibility, and copyright issues. The paper offers insights into future challenges and trends on FMs for music, aiming to shape the trajectory of human-AI collaboration in the music realm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14340v3-abstract-full').style.display = 'none'; document.getElementById('2408.14340v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07349">arXiv:2408.07349</a> <span> [<a href="https://arxiv.org/pdf/2408.07349">pdf</a>, <a href="https://arxiv.org/format/2408.07349">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Automated Retinal Image Analysis and Medical Report Generation through Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jia-Hong Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07349v1-abstract-short" style="display: inline;"> The increasing prevalence of retinal diseases poses a significant challenge to the healthcare system, as the demand for ophthalmologists surpasses the available workforce. This imbalance creates a bottleneck in diagnosis and treatment, potentially delaying critical care. Traditional methods of generating medical reports from retinal images rely on manual interpretation, which is time-consuming and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07349v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07349v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07349v1-abstract-full" style="display: none;"> The increasing prevalence of retinal diseases poses a significant challenge to the healthcare system, as the demand for ophthalmologists surpasses the available workforce. This imbalance creates a bottleneck in diagnosis and treatment, potentially delaying critical care. Traditional methods of generating medical reports from retinal images rely on manual interpretation, which is time-consuming and prone to errors, further straining ophthalmologists' limited resources. This thesis investigates the potential of Artificial Intelligence (AI) to automate medical report generation for retinal images. AI can quickly analyze large volumes of image data, identifying subtle patterns essential for accurate diagnosis. By automating this process, AI systems can greatly enhance the efficiency of retinal disease diagnosis, reducing doctors' workloads and enabling them to focus on more complex cases. The proposed AI-based methods address key challenges in automated report generation: (1) Improved methods for medical keyword representation enhance the system's ability to capture nuances in medical terminology; (2) A multi-modal deep learning approach captures interactions between textual keywords and retinal images, resulting in more comprehensive medical reports; (3) Techniques to enhance the interpretability of the AI-based report generation system, fostering trust and acceptance in clinical practice. These methods are rigorously evaluated using various metrics and achieve state-of-the-art performance. This thesis demonstrates AI's potential to revolutionize retinal disease diagnosis by automating medical report generation, ultimately improving clinical efficiency, diagnostic accuracy, and patient care. [https://github.com/Jhhuangkay/DeepOpht-Medical-Report-Generation-for-Retinal-Images-via-Deep-Models-and-Visual-Explanation] <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07349v1-abstract-full').style.display = 'none'; document.getElementById('2408.07349v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Ph.D. thesis, 124 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04708">arXiv:2408.04708</a> <span> [<a href="https://arxiv.org/pdf/2408.04708">pdf</a>, <a href="https://arxiv.org/format/2408.04708">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> MulliVC: Multi-lingual Voice Conversion With Cycle Consistency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiawei Huang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chen Zhang</a>, <a href="/search/eess?searchtype=author&query=Ren%2C+Y">Yi Ren</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+Z">Ziyue Jiang</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+Z">Zhenhui Ye</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jinglin Liu</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Jinzheng He</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+X">Xiang Yin</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Z">Zhou Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04708v1-abstract-short" style="display: inline;"> Voice conversion aims to modify the source speaker's voice to resemble the target speaker while preserving the original speech content. Despite notable advancements in voice conversion these days, multi-lingual voice conversion (including both monolingual and cross-lingual scenarios) has yet to be extensively studied. It faces two main challenges: 1) the considerable variability in prosody and art… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04708v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04708v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04708v1-abstract-full" style="display: none;"> Voice conversion aims to modify the source speaker's voice to resemble the target speaker while preserving the original speech content. Despite notable advancements in voice conversion these days, multi-lingual voice conversion (including both monolingual and cross-lingual scenarios) has yet to be extensively studied. It faces two main challenges: 1) the considerable variability in prosody and articulation habits across languages; and 2) the rarity of paired multi-lingual datasets from the same speaker. In this paper, we propose MulliVC, a novel voice conversion system that only converts timbre and keeps original content and source language prosody without multi-lingual paired data. Specifically, each training step of MulliVC contains three substeps: In step one the model is trained with monolingual speech data; then, steps two and three take inspiration from back translation, construct a cyclical process to disentangle the timbre and other information (content, prosody, and other language-related information) in the absence of multi-lingual data from the same speaker. Both objective and subjective results indicate that MulliVC significantly surpasses other methods in both monolingual and cross-lingual contexts, demonstrating the system's efficacy and the viability of the three-step approach with cycle consistency. Audio samples can be found on our demo page (mullivc.github.io). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04708v1-abstract-full').style.display = 'none'; document.getElementById('2408.04708v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.21328">arXiv:2407.21328</a> <span> [<a href="https://arxiv.org/pdf/2407.21328">pdf</a>, <a href="https://arxiv.org/format/2407.21328">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Knowledge-Guided Prompt Learning for Lifespan Brain MR Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Teng%2C+L">Lin Teng</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Z">Zihao Zhao</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiawei Huang</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+Z">Zehong Cao</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+R">Runqi Meng</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+F">Feng Shi</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+D">Dinggang Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.21328v1-abstract-short" style="display: inline;"> Automatic and accurate segmentation of brain MR images throughout the human lifespan into tissue and structure is crucial for understanding brain development and diagnosing diseases. However, challenges arise from the intricate variations in brain appearance due to rapid early brain development, aging, and disorders, compounded by the limited availability of manually-labeled datasets. In response,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21328v1-abstract-full').style.display = 'inline'; document.getElementById('2407.21328v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.21328v1-abstract-full" style="display: none;"> Automatic and accurate segmentation of brain MR images throughout the human lifespan into tissue and structure is crucial for understanding brain development and diagnosing diseases. However, challenges arise from the intricate variations in brain appearance due to rapid early brain development, aging, and disorders, compounded by the limited availability of manually-labeled datasets. In response, we present a two-step segmentation framework employing Knowledge-Guided Prompt Learning (KGPL) for brain MRI. Specifically, we first pre-train segmentation models on large-scale datasets with sub-optimal labels, followed by the incorporation of knowledge-driven embeddings learned from image-text alignment into the models. The introduction of knowledge-wise prompts captures semantic relationships between anatomical variability and biological processes, enabling models to learn structural feature embeddings across diverse age groups. Experimental findings demonstrate the superiority and robustness of our proposed method, particularly noticeable when employing Swin UNETR as the backbone. Our approach achieves average DSC values of 95.17% and 94.19% for brain tissue and structure segmentation, respectively. Our code is available at https://github.com/TL9792/KGPL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21328v1-abstract-full').style.display = 'none'; document.getElementById('2407.21328v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20955">arXiv:2407.20955</a> <span> [<a href="https://arxiv.org/pdf/2407.20955">pdf</a>, <a href="https://arxiv.org/format/2407.20955">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Emotion-driven Piano Music Generation via Two-stage Disentanglement and Functional Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jingyue Huang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+K">Ke Chen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yi-Hsuan Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20955v1-abstract-short" style="display: inline;"> Managing the emotional aspect remains a challenge in automatic music generation. Prior works aim to learn various emotions at once, leading to inadequate modeling. This paper explores the disentanglement of emotions in piano performance generation through a two-stage framework. The first stage focuses on valence modeling of lead sheet, and the second stage addresses arousal modeling by introducing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20955v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20955v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20955v1-abstract-full" style="display: none;"> Managing the emotional aspect remains a challenge in automatic music generation. Prior works aim to learn various emotions at once, leading to inadequate modeling. This paper explores the disentanglement of emotions in piano performance generation through a two-stage framework. The first stage focuses on valence modeling of lead sheet, and the second stage addresses arousal modeling by introducing performance-level attributes. To further capture features that shape valence, an aspect less explored by previous approaches, we introduce a novel functional representation of symbolic music. This representation aims to capture the emotional impact of major-minor tonality, as well as the interactions among notes, chords, and key signatures. Objective and subjective experiments validate the effectiveness of our framework in both emotional valence and arousal modeling. We further leverage our framework in a novel application of emotional controls, showing a broad potential in emotion-driven music generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20955v1-abstract-full').style.display = 'none'; document.getElementById('2407.20955v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proceedings of the 25th International Society for Music Information Retrieval Conference, ISMIR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20176">arXiv:2407.20176</a> <span> [<a href="https://arxiv.org/pdf/2407.20176">pdf</a>, <a href="https://arxiv.org/format/2407.20176">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Emotion-Driven Melody Harmonization via Melodic Variation and Functional Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jingyue Huang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yi-Hsuan Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20176v2-abstract-short" style="display: inline;"> Emotion-driven melody harmonization aims to generate diverse harmonies for a single melody to convey desired emotions. Previous research found it hard to alter the perceived emotional valence of lead sheets only by harmonizing the same melody with different chords, which may be attributed to the constraints imposed by the melody itself and the limitation of existing music representation. In this p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20176v2-abstract-full').style.display = 'inline'; document.getElementById('2407.20176v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20176v2-abstract-full" style="display: none;"> Emotion-driven melody harmonization aims to generate diverse harmonies for a single melody to convey desired emotions. Previous research found it hard to alter the perceived emotional valence of lead sheets only by harmonizing the same melody with different chords, which may be attributed to the constraints imposed by the melody itself and the limitation of existing music representation. In this paper, we propose a novel functional representation for symbolic music. This new method takes musical keys into account, recognizing their significant role in shaping music's emotional character through major-minor tonality. It also allows for melodic variation with respect to keys and addresses the problem of data scarcity for better emotion modeling. A Transformer is employed to harmonize key-adaptable melodies, allowing for keys determined in rule-based or model-based manner. Experimental results confirm the effectiveness of our new representation in generating key-aware harmonies, with objective and subjective evaluations affirming the potential of our approach to convey specific valence for versatile melody. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20176v2-abstract-full').style.display = 'none'; document.getElementById('2407.20176v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work is the initial version of the ISMIR 2024 paper EMO-Disentanger</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14754">arXiv:2407.14754</a> <span> [<a href="https://arxiv.org/pdf/2407.14754">pdf</a>, <a href="https://arxiv.org/format/2407.14754">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Representing Topological Self-Similarity Using Fractal Feature Maps for Accurate Segmentation of Tubular Structures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiaxing Huang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yanfeng Zhou</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yaoru Luo</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+G">Guole Liu</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+H">Heng Guo</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Ge Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14754v1-abstract-short" style="display: inline;"> Accurate segmentation of long and thin tubular structures is required in a wide variety of areas such as biology, medicine, and remote sensing. The complex topology and geometry of such structures often pose significant technical challenges. A fundamental property of such structures is their topological self-similarity, which can be quantified by fractal features such as fractal dimension (FD). In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14754v1-abstract-full').style.display = 'inline'; document.getElementById('2407.14754v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14754v1-abstract-full" style="display: none;"> Accurate segmentation of long and thin tubular structures is required in a wide variety of areas such as biology, medicine, and remote sensing. The complex topology and geometry of such structures often pose significant technical challenges. A fundamental property of such structures is their topological self-similarity, which can be quantified by fractal features such as fractal dimension (FD). In this study, we incorporate fractal features into a deep learning model by extending FD to the pixel-level using a sliding window technique. The resulting fractal feature maps (FFMs) are then incorporated as additional input to the model and additional weight in the loss function to enhance segmentation performance by utilizing the topological self-similarity. Moreover, we extend the U-Net architecture by incorporating an edge decoder and a skeleton decoder to improve boundary accuracy and skeletal continuity of segmentation, respectively. Extensive experiments on five tubular structure datasets validate the effectiveness and robustness of our approach. Furthermore, the integration of FFMs with other popular segmentation models such as HR-Net also yields performance enhancement, suggesting FFM can be incorporated as a plug-in module with different model architectures. Code and data are openly accessible at https://github.com/cbmi-group/FFM-Multi-Decoder-Network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14754v1-abstract-full').style.display = 'none'; document.getElementById('2407.14754v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05749">arXiv:2407.05749</a> <span> [<a href="https://arxiv.org/pdf/2407.05749">pdf</a>, <a href="https://arxiv.org/format/2407.05749">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LDGCN: An Edge-End Lightweight Dual GCN Based on Single-Channel EEG for Driver Drowsiness Monitoring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jingwei Huang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Chuansheng Wang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiayan Huang</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+H">Haoyi Fan</a>, <a href="/search/eess?searchtype=author&query=Grau%2C+A">Antoni Grau</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+F">Fuquan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05749v1-abstract-short" style="display: inline;"> Driver drowsiness electroencephalography (EEG) signal monitoring can timely alert drivers of their drowsiness status, thereby reducing the probability of traffic accidents. Graph convolutional networks (GCNs) have shown significant advancements in processing the non-stationary, time-varying, and non-Euclidean nature of EEG signals. However, the existing single-channel EEG adjacency graph construct… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05749v1-abstract-full').style.display = 'inline'; document.getElementById('2407.05749v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05749v1-abstract-full" style="display: none;"> Driver drowsiness electroencephalography (EEG) signal monitoring can timely alert drivers of their drowsiness status, thereby reducing the probability of traffic accidents. Graph convolutional networks (GCNs) have shown significant advancements in processing the non-stationary, time-varying, and non-Euclidean nature of EEG signals. However, the existing single-channel EEG adjacency graph construction process lacks interpretability, which hinders the ability of GCNs to effectively extract adjacency graph features, thus affecting the performance of drowsiness monitoring. To address this issue, we propose an edge-end lightweight dual graph convolutional network (LDGCN). Specifically, we are the first to incorporate neurophysiological knowledge to design a Baseline Drowsiness Status Adjacency Graph (BDSAG), which characterizes driver drowsiness status. Additionally, to express more features within limited EEG data, we introduce the Augmented Graph-level Module (AGM). This module captures global and local information at the graph level, ensuring that BDSAG features remain intact while enhancing effective feature expression capability. Furthermore, to deploy our method on the fourth-generation Raspberry Pi, we utilize Adaptive Pruning Optimization (APO) on both channels and neurons, reducing inference latency by almost half. Experiments on benchmark datasets demonstrate that LDGCN offers the best trade-off between monitoring performance and hardware resource utilization compared to existing state-of-the-art algorithms. All our source code can be found at https://github.com/BryantDom/Driver-Drowsiness-Monitoring. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05749v1-abstract-full').style.display = 'none'; document.getElementById('2407.05749v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17618">arXiv:2406.17618</a> <span> [<a href="https://arxiv.org/pdf/2406.17618">pdf</a>, <a href="https://arxiv.org/format/2406.17618">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Towards Building an End-to-End Multilingual Automatic Lyrics Transcription Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiawen Huang</a>, <a href="/search/eess?searchtype=author&query=Benetos%2C+E">Emmanouil Benetos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17618v1-abstract-short" style="display: inline;"> Multilingual automatic lyrics transcription (ALT) is a challenging task due to the limited availability of labelled data and the challenges introduced by singing, compared to multilingual automatic speech recognition. Although some multilingual singing datasets have been released recently, English continues to dominate these collections. Multilingual ALT remains underexplored due to the scale of d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17618v1-abstract-full').style.display = 'inline'; document.getElementById('2406.17618v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17618v1-abstract-full" style="display: none;"> Multilingual automatic lyrics transcription (ALT) is a challenging task due to the limited availability of labelled data and the challenges introduced by singing, compared to multilingual automatic speech recognition. Although some multilingual singing datasets have been released recently, English continues to dominate these collections. Multilingual ALT remains underexplored due to the scale of data and annotation quality. In this paper, we aim to create a multilingual ALT system with available datasets. Inspired by architectures that have been proven effective for English ALT, we adapt these techniques to the multilingual scenario by expanding the target vocabulary set. We then evaluate the performance of the multilingual model in comparison to its monolingual counterparts. Additionally, we explore various conditioning methods to incorporate language information into the model. We apply analysis by language and combine it with the language classification performance. Our findings reveal that the multilingual model performs consistently better than the monolingual models trained on the language subsets. Furthermore, we demonstrate that incorporating language information significantly enhances performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17618v1-abstract-full').style.display = 'none'; document.getElementById('2406.17618v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EUSIPCO 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17173">arXiv:2406.17173</a> <span> [<a href="https://arxiv.org/pdf/2406.17173">pdf</a>, <a href="https://arxiv.org/format/2406.17173">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Diff3Dformer: Leveraging Slice Sequence Diffusion for Enhanced 3D CT Classification with Transformer Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jin%2C+Z">Zihao Jin</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yingying Fang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+C">Caiwen Xu</a>, <a href="/search/eess?searchtype=author&query=Walsh%2C+S">Simon Walsh</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17173v2-abstract-short" style="display: inline;"> The manifestation of symptoms associated with lung diseases can vary in different depths for individual patients, highlighting the significance of 3D information in CT scans for medical image classification. While Vision Transformer has shown superior performance over convolutional neural networks in image classification tasks, their effectiveness is often demonstrated on sufficiently large 2D dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17173v2-abstract-full').style.display = 'inline'; document.getElementById('2406.17173v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17173v2-abstract-full" style="display: none;"> The manifestation of symptoms associated with lung diseases can vary in different depths for individual patients, highlighting the significance of 3D information in CT scans for medical image classification. While Vision Transformer has shown superior performance over convolutional neural networks in image classification tasks, their effectiveness is often demonstrated on sufficiently large 2D datasets and they easily encounter overfitting issues on small medical image datasets. To address this limitation, we propose a Diffusion-based 3D Vision Transformer (Diff3Dformer), which utilizes the latent space of the Diffusion model to form the slice sequence for 3D analysis and incorporates clustering attention into ViT to aggregate repetitive information within 3D CT scans, thereby harnessing the power of the advanced transformer in 3D classification tasks on small datasets. Our method exhibits improved performance on two different scales of small datasets of 3D lung CT scans, surpassing the state of the art 3D methods and other transformer-based approaches that emerged during the COVID-19 pandemic, demonstrating its robust and superior performance across different scales of data. Experimental results underscore the superiority of our proposed method, indicating its potential for enhancing medical image classification tasks in real-world scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17173v2-abstract-full').style.display = 'none'; document.getElementById('2406.17173v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16026">arXiv:2406.16026</a> <span> [<a href="https://arxiv.org/pdf/2406.16026">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> CEST-KAN: Kolmogorov-Arnold Networks for CEST MRI Data Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiawen Wang</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+P">Pei Cai</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Ziyan Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Huabin Zhang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jianpan Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16026v2-abstract-short" style="display: inline;"> Purpose: This study aims to propose and investigate the feasibility of using Kolmogorov-Arnold Network (KAN) for CEST MRI data analysis (CEST-KAN). Methods: CEST MRI data were acquired from twelve healthy volunteers at 3T. Data from ten subjects were used for training, while the remaining two were reserved for testing. The performance of multi-layer perceptron (MLP) and KAN models with the same ne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16026v2-abstract-full').style.display = 'inline'; document.getElementById('2406.16026v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16026v2-abstract-full" style="display: none;"> Purpose: This study aims to propose and investigate the feasibility of using Kolmogorov-Arnold Network (KAN) for CEST MRI data analysis (CEST-KAN). Methods: CEST MRI data were acquired from twelve healthy volunteers at 3T. Data from ten subjects were used for training, while the remaining two were reserved for testing. The performance of multi-layer perceptron (MLP) and KAN models with the same network settings were evaluated and compared to the conventional multi-pool Lorentzian fitting (MPLF) method in generating water and multiple CEST contrasts, including amide, relayed nuclear Overhauser effect (rNOE), and magnetization transfer (MT). Results: The water and CEST maps generated by both MLP and KAN were visually comparable to the MPLF results. However, the KAN model demonstrated higher accuracy in extrapolating the CEST fitting metrics, as evidenced by the smaller validation loss during training and smaller absolute error during testing. Voxel-wise correlation analysis showed that all four CEST fitting metrics generated by KAN consistently exhibited higher Pearson coefficients than the MLP results, indicating superior performance. Moreover, the KAN models consistently outperformed the MLP models in varying hidden layer numbers despite longer training time. Conclusion: In this study, we demonstrated for the first time the feasibility of utilizing KAN for CEST MRI data analysis, highlighting its superiority over MLP in this task. The findings suggest that CEST-KAN has the potential to be a robust and reliable post-analysis tool for CEST MRI in clinical settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16026v2-abstract-full').style.display = 'none'; document.getElementById('2406.16026v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15222">arXiv:2406.15222</a> <span> [<a href="https://arxiv.org/pdf/2406.15222">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rapid and Accurate Diagnosis of Acute Aortic Syndrome using Non-contrast CT: A Large-scale, Retrospective, Multi-center and AI-based Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+Y">Yujian Hu</a>, <a href="/search/eess?searchtype=author&query=Xiang%2C+Y">Yilang Xiang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yan-Jie Zhou</a>, <a href="/search/eess?searchtype=author&query=He%2C+Y">Yangyan He</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+S">Shifeng Yang</a>, <a href="/search/eess?searchtype=author&query=Du%2C+X">Xiaolong Du</a>, <a href="/search/eess?searchtype=author&query=Den%2C+C">Chunlan Den</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Y">Youyao Xu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+G">Gaofeng Wang</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Z">Zhengyao Ding</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jingyong Huang</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+W">Wenjun Zhao</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+X">Xuejun Wu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Donglin Li</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Q">Qianqian Zhu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhenjiang Li</a>, <a href="/search/eess?searchtype=author&query=Qiu%2C+C">Chenyang Qiu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Z">Ziheng Wu</a>, <a href="/search/eess?searchtype=author&query=He%2C+Y">Yunjun He</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+C">Chen Tian</a>, <a href="/search/eess?searchtype=author&query=Qiu%2C+Y">Yihui Qiu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Z">Zuodong Lin</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaolong Zhang</a>, <a href="/search/eess?searchtype=author&query=He%2C+Y">Yuan He</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+Z">Zhenpeng Yuan</a> , et al. (15 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15222v3-abstract-short" style="display: inline;"> Chest pain symptoms are highly prevalent in emergency departments (EDs), where acute aortic syndrome (AAS) is a catastrophic cardiovascular emergency with a high fatality rate, especially when timely and accurate treatment is not administered. However, current triage practices in the ED can cause up to approximately half of patients with AAS to have an initially missed diagnosis or be misdiagnosed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15222v3-abstract-full').style.display = 'inline'; document.getElementById('2406.15222v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15222v3-abstract-full" style="display: none;"> Chest pain symptoms are highly prevalent in emergency departments (EDs), where acute aortic syndrome (AAS) is a catastrophic cardiovascular emergency with a high fatality rate, especially when timely and accurate treatment is not administered. However, current triage practices in the ED can cause up to approximately half of patients with AAS to have an initially missed diagnosis or be misdiagnosed as having other acute chest pain conditions. Subsequently, these AAS patients will undergo clinically inaccurate or suboptimal differential diagnosis. Fortunately, even under these suboptimal protocols, nearly all these patients underwent non-contrast CT covering the aorta anatomy at the early stage of differential diagnosis. In this study, we developed an artificial intelligence model (DeepAAS) using non-contrast CT, which is highly accurate for identifying AAS and provides interpretable results to assist in clinical decision-making. Performance was assessed in two major phases: a multi-center retrospective study (n = 20,750) and an exploration in real-world emergency scenarios (n = 137,525). In the multi-center cohort, DeepAAS achieved a mean area under the receiver operating characteristic curve of 0.958 (95% CI 0.950-0.967). In the real-world cohort, DeepAAS detected 109 AAS patients with misguided initial suspicion, achieving 92.6% (95% CI 76.2%-97.5%) in mean sensitivity and 99.2% (95% CI 99.1%-99.3%) in mean specificity. Our AI model performed well on non-contrast CT at all applicable early stages of differential diagnosis workflows, effectively reduced the overall missed diagnosis and misdiagnosis rate from 48.8% to 4.8% and shortened the diagnosis time for patients with misguided initial suspicion from an average of 681.8 (74-11,820) mins to 68.5 (23-195) mins. DeepAAS could effectively fill the gap in the current clinical workflow without requiring additional tests. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15222v3-abstract-full').style.display = 'none'; document.getElementById('2406.15222v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13788">arXiv:2406.13788</a> <span> [<a href="https://arxiv.org/pdf/2406.13788">pdf</a>, <a href="https://arxiv.org/format/2406.13788">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Groupwise Deformable Registration of Diffusion Tensor Cardiovascular Magnetic Resonance: Disentangling Diffusion Contrast, Respiratory and Cardiac Motions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yihao Luo</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+K">Ke Wen</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Ferreira%2C+P+F">Pedro F. Ferreira</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yaqing Luo</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yinzhe Wu</a>, <a href="/search/eess?searchtype=author&query=Munoz%2C+C">Camila Munoz</a>, <a href="/search/eess?searchtype=author&query=Pennell%2C+D+J">Dudley J. Pennell</a>, <a href="/search/eess?searchtype=author&query=Scott%2C+A+D">Andrew D. Scott</a>, <a href="/search/eess?searchtype=author&query=Nielles-Vallespin%2C+S">Sonia Nielles-Vallespin</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13788v2-abstract-short" style="display: inline;"> Diffusion tensor based cardiovascular magnetic resonance (DT-CMR) offers a non-invasive method to visualize the myocardial microstructure. With the assumption that the heart is stationary, frames are acquired with multiple repetitions for different diffusion encoding directions. However, motion from poor breath-holding and imprecise cardiac triggering complicates DT-CMR analysis, further challenge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13788v2-abstract-full').style.display = 'inline'; document.getElementById('2406.13788v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13788v2-abstract-full" style="display: none;"> Diffusion tensor based cardiovascular magnetic resonance (DT-CMR) offers a non-invasive method to visualize the myocardial microstructure. With the assumption that the heart is stationary, frames are acquired with multiple repetitions for different diffusion encoding directions. However, motion from poor breath-holding and imprecise cardiac triggering complicates DT-CMR analysis, further challenged by its inherently low SNR, varied contrasts, and diffusion induced textures. Our solution is a novel framework employing groupwise registration with an implicit template to isolate respiratory and cardiac motions, while a tensor-embedded branch preserves diffusion contrast textures. We have devised a loss refinement tailored for non-linear least squares fitting and low SNR conditions. Additionally, we introduce new physics-based and clinical metrics for performance evaluation. Access code and supplementary materials at: https://github.com/ayanglab/DTCMR-Reg <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13788v2-abstract-full').style.display = 'none'; document.getElementById('2406.13788v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13708">arXiv:2406.13708</a> <span> [<a href="https://arxiv.org/pdf/2406.13708">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> Low-rank based motion correction followed by automatic frame selection in DT-CMR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Ferreira%2C+P+F">Pedro F. Ferreira</a>, <a href="/search/eess?searchtype=author&query=Munoz%2C+C">Camila Munoz</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+K">Ke Wen</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yaqing Luo</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yinzhe Wu</a>, <a href="/search/eess?searchtype=author&query=Pennell%2C+D+J">Dudley J. Pennell</a>, <a href="/search/eess?searchtype=author&query=Scott%2C+A+D">Andrew D. Scott</a>, <a href="/search/eess?searchtype=author&query=Nielles-Vallespin%2C+S">Sonia Nielles-Vallespin</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13708v1-abstract-short" style="display: inline;"> Motivation: Post-processing of in-vivo diffusion tensor CMR (DT-CMR) is challenging due to the low SNR and variation in contrast between frames which makes image registration difficult, and the need to manually reject frames corrupted by motion. Goals: To develop a semi-automatic post-processing pipeline for robust DT-CMR registration and automatic frame selection. Approach: We used low intrinsic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13708v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13708v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13708v1-abstract-full" style="display: none;"> Motivation: Post-processing of in-vivo diffusion tensor CMR (DT-CMR) is challenging due to the low SNR and variation in contrast between frames which makes image registration difficult, and the need to manually reject frames corrupted by motion. Goals: To develop a semi-automatic post-processing pipeline for robust DT-CMR registration and automatic frame selection. Approach: We used low intrinsic rank averaged frames as the reference to register other low-ranked frames. A myocardium-guided frame selection rejected the frames with signal loss, through-plane motion and poor registration. Results: The proposed method outperformed our previous noise-robust rigid registration on helix angle data quality and reduced negative eigenvalues in healthy volunteers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13708v1-abstract-full').style.display = 'none'; document.getElementById('2406.13708v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as ISMRM 2024 Digital poster 2141</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ISMRM 2024 Digital poster 2141 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00993">arXiv:2406.00993</a> <span> [<a href="https://arxiv.org/pdf/2406.00993">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Other Quantitative Biology">q-bio.OT</span> </div> </div> <p class="title is-5 mathjax"> Detection of Acetone as a Gas Biomarker for Diabetes Based on Gas Sensor Technology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wei%2C+J">Jiaming Wei</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+T">Tong Liu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jipeng Huang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiaowei Li</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+Y">Yurui Qi</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+G">Gangyin Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00993v1-abstract-short" style="display: inline;"> With the continuous development and improvement of medical services, there is a growing demand for improving diabetes diagnosis. Exhaled breath analysis, characterized by its speed, convenience, and non-invasive nature, is leading the trend in diagnostic development. Studies have shown that the acetone levels in the breath of diabetes patients are higher than normal, making acetone a basis for dia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00993v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00993v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00993v1-abstract-full" style="display: none;"> With the continuous development and improvement of medical services, there is a growing demand for improving diabetes diagnosis. Exhaled breath analysis, characterized by its speed, convenience, and non-invasive nature, is leading the trend in diagnostic development. Studies have shown that the acetone levels in the breath of diabetes patients are higher than normal, making acetone a basis for diabetes breath analysis. This provides a more readily accepted method for early diabetes prevention and monitoring. Addressing issues such as the invasive nature, disease transmission risks, and complexity of diabetes testing, this study aims to design a diabetes gas biomarker acetone detection system centered around a sensor array using gas sensors and pattern recognition algorithms. The research covers sensor selection, sensor preparation, circuit design, data acquisition and processing, and detection model establishment to accurately identify acetone. Titanium dioxide was chosen as the nano gas-sensitive material to prepare the acetone gas sensor, with data collection conducted using STM32. Filtering was applied to process the raw sensor data, followed by feature extraction using principal component analysis. A recognition model based on support vector machine algorithm was used for qualitative identification of gas samples, while a recognition model based on backpropagation neural network was employed for quantitative detection of gas sample concentrations. Experimental results demonstrated recognition accuracies of 96% and 97.5% for acetone-ethanol and acetone-methanol mixed gases, and 90% for ternary acetone, ethanol, and methanol mixed gases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00993v1-abstract-full').style.display = 'none'; document.getElementById('2406.00993v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00320">arXiv:2406.00320</a> <span> [<a href="https://arxiv.org/pdf/2406.00320">pdf</a>, <a href="https://arxiv.org/format/2406.00320">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Frieren: Efficient Video-to-Audio Generation Network with Rectified Flow Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yongqi Wang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+W">Wenxiang Guo</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+R">Rongjie Huang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiawei Huang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zehan Wang</a>, <a href="/search/eess?searchtype=author&query=You%2C+F">Fuming You</a>, <a href="/search/eess?searchtype=author&query=Li%2C+R">Ruiqi Li</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Z">Zhou Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00320v3-abstract-short" style="display: inline;"> Video-to-audio (V2A) generation aims to synthesize content-matching audio from silent video, and it remains challenging to build V2A models with high generation quality, efficiency, and visual-audio temporal synchrony. We propose Frieren, a V2A model based on rectified flow matching. Frieren regresses the conditional transport vector field from noise to spectrogram latent with straight paths and c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00320v3-abstract-full').style.display = 'inline'; document.getElementById('2406.00320v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00320v3-abstract-full" style="display: none;"> Video-to-audio (V2A) generation aims to synthesize content-matching audio from silent video, and it remains challenging to build V2A models with high generation quality, efficiency, and visual-audio temporal synchrony. We propose Frieren, a V2A model based on rectified flow matching. Frieren regresses the conditional transport vector field from noise to spectrogram latent with straight paths and conducts sampling by solving ODE, outperforming autoregressive and score-based models in terms of audio quality. By employing a non-autoregressive vector field estimator based on a feed-forward transformer and channel-level cross-modal feature fusion with strong temporal alignment, our model generates audio that is highly synchronized with the input video. Furthermore, through reflow and one-step distillation with guided vector field, our model can generate decent audio in a few, or even only one sampling step. Experiments indicate that Frieren achieves state-of-the-art performance in both generation quality and temporal alignment on VGGSound, with alignment accuracy reaching 97.22%, and 6.2% improvement in inception score over the strong diffusion-based baseline. Audio samples are available at http://frieren-v2a.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00320v3-abstract-full').style.display = 'none'; document.getElementById('2406.00320v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19665">arXiv:2405.19665</a> <span> [<a href="https://arxiv.org/pdf/2405.19665">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A novel fault localization with data refinement for hydroelectric units </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jialong Huang</a>, <a href="/search/eess?searchtype=author&query=Song%2C+J">Junlin Song</a>, <a href="/search/eess?searchtype=author&query=Lian%2C+P">Penglong Lian</a>, <a href="/search/eess?searchtype=author&query=Gan%2C+M">Mengjie Gan</a>, <a href="/search/eess?searchtype=author&query=Su%2C+Z">Zhiheng Su</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+B">Benhao Wang</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+W">Wenji Zhu</a>, <a href="/search/eess?searchtype=author&query=Pu%2C+X">Xiaomin Pu</a>, <a href="/search/eess?searchtype=author&query=Zou%2C+J">Jianxiao Zou</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+S">Shicai Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19665v1-abstract-short" style="display: inline;"> Due to the scarcity of fault samples and the complexity of non-linear and non-smooth characteristics data in hydroelectric units, most of the traditional hydroelectric unit fault localization methods are difficult to carry out accurate localization. To address these problems, a sparse autoencoder (SAE)-generative adversarial network (GAN)-wavelet noise reduction (WNR)- manifold-boosted deep learni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19665v1-abstract-full').style.display = 'inline'; document.getElementById('2405.19665v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19665v1-abstract-full" style="display: none;"> Due to the scarcity of fault samples and the complexity of non-linear and non-smooth characteristics data in hydroelectric units, most of the traditional hydroelectric unit fault localization methods are difficult to carry out accurate localization. To address these problems, a sparse autoencoder (SAE)-generative adversarial network (GAN)-wavelet noise reduction (WNR)- manifold-boosted deep learning (SG-WMBDL) based fault localization method for hydroelectric units is proposed. To overcome the data scarcity, a SAE is embedded into the GAN to generate more high-quality samples in the data generation module. Considering the signals involving non-linear and non-smooth characteristics, the improved WNR which combining both soft and hard thresholding and local linear embedding (LLE) are utilized to the data preprocessing module in order to reduce the noise and effectively capture the local features. In addition, to seek higher performance, the novel Adaptive Boost (AdaBoost) combined with multi deep learning is proposed to achieve accurate fault localization. The experimental results show that the SG-WMBDL can locate faults for hydroelectric units under a small number of fault samples with non-linear and non-smooth characteristics on higher precision and accuracy compared to other frontier methods, which verifies the effectiveness and practicality of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19665v1-abstract-full').style.display = 'none'; document.getElementById('2405.19665v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6pages,4 figures,Conference on Decision and Control(CDC) conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17659">arXiv:2405.17659</a> <span> [<a href="https://arxiv.org/pdf/2405.17659">pdf</a>, <a href="https://arxiv.org/format/2405.17659">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Global Sensitivity and Uncertainty Quantification in Medical Image Reconstruction with Monte Carlo Arbitrary-Masked Mamba </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+L">Liutao Yang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+W">Weiwen Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Chengyan Wang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+K">Kuangyu Shi</a>, <a href="/search/eess?searchtype=author&query=Aviles-Rivero%2C+A+I">Angelica I. Aviles-Rivero</a>, <a href="/search/eess?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+D">Daoqiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17659v2-abstract-short" style="display: inline;"> Deep learning has been extensively applied in medical image reconstruction, where Convolutional Neural Networks (CNNs) and Vision Transformers (ViTs) represent the predominant paradigms, each possessing distinct advantages and inherent limitations: CNNs exhibit linear complexity with local sensitivity, whereas ViTs demonstrate quadratic complexity with global sensitivity. The emerging Mamba has sh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17659v2-abstract-full').style.display = 'inline'; document.getElementById('2405.17659v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17659v2-abstract-full" style="display: none;"> Deep learning has been extensively applied in medical image reconstruction, where Convolutional Neural Networks (CNNs) and Vision Transformers (ViTs) represent the predominant paradigms, each possessing distinct advantages and inherent limitations: CNNs exhibit linear complexity with local sensitivity, whereas ViTs demonstrate quadratic complexity with global sensitivity. The emerging Mamba has shown superiority in learning visual representation, which combines the advantages of linear scalability and global sensitivity. In this study, we introduce MambaMIR, an Arbitrary-Masked Mamba-based model with wavelet decomposition for joint medical image reconstruction and uncertainty estimation. A novel Arbitrary Scan Masking (ASM) mechanism "masks out" redundant information to introduce randomness for further uncertainty estimation. Compared to the commonly used Monte Carlo (MC) dropout, our proposed MC-ASM provides an uncertainty map without the need for hyperparameter tuning and mitigates the performance drop typically observed when applying dropout to low-level tasks. For further texture preservation and better perceptual quality, we employ the wavelet transformation into MambaMIR and explore its variant based on the Generative Adversarial Network, namely MambaMIR-GAN. Comprehensive experiments have been conducted for multiple representative medical image reconstruction tasks, demonstrating that the proposed MambaMIR and MambaMIR-GAN outperform other baseline and state-of-the-art methods in different reconstruction tasks, where MambaMIR achieves the best reconstruction fidelity and MambaMIR-GAN has the best perceptual quality. In addition, our MC-ASM provides uncertainty maps as an additional tool for clinicians, while mitigating the typical performance drop caused by the commonly used dropout. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17659v2-abstract-full').style.display = 'none'; document.getElementById('2405.17659v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.12847">arXiv:2405.12847</a> <span> [<a href="https://arxiv.org/pdf/2405.12847">pdf</a>, <a href="https://arxiv.org/format/2405.12847">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5281/zenodo.10265251">10.5281/zenodo.10265251 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Dataset and Baselines for Measuring and Predicting the Music Piece Memorability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tseng%2C+L">Li-Yang Tseng</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+T">Tzu-Ling Lin</a>, <a href="/search/eess?searchtype=author&query=Shuai%2C+H">Hong-Han Shuai</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jen-Wei Huang</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+W">Wen-Whei Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.12847v1-abstract-short" style="display: inline;"> Nowadays, humans are constantly exposed to music, whether through voluntary streaming services or incidental encounters during commercial breaks. Despite the abundance of music, certain pieces remain more memorable and often gain greater popularity. Inspired by this phenomenon, we focus on measuring and predicting music memorability. To achieve this, we collect a new music piece dataset with relia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12847v1-abstract-full').style.display = 'inline'; document.getElementById('2405.12847v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.12847v1-abstract-full" style="display: none;"> Nowadays, humans are constantly exposed to music, whether through voluntary streaming services or incidental encounters during commercial breaks. Despite the abundance of music, certain pieces remain more memorable and often gain greater popularity. Inspired by this phenomenon, we focus on measuring and predicting music memorability. To achieve this, we collect a new music piece dataset with reliable memorability labels using a novel interactive experimental procedure. We then train baselines to predict and analyze music memorability, leveraging both interpretable features and audio mel-spectrograms as inputs. To the best of our knowledge, we are the first to explore music memorability using data-driven deep learning-based methods. Through a series of experiments and ablation studies, we demonstrate that while there is room for improvement, predicting music memorability with limited data is possible. Certain intrinsic elements, such as higher valence, arousal, and faster tempo, contribute to memorable music. As prediction techniques continue to evolve, real-life applications like music recommendation systems and music style transfer will undoubtedly benefit from this new area of research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12847v1-abstract-full').style.display = 'none'; document.getElementById('2405.12847v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 24th International Society for Music Information Retrieval Conference, 174-181. Milan, Italy, November 5-9, 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10691">arXiv:2405.10691</a> <span> [<a href="https://arxiv.org/pdf/2405.10691">pdf</a>, <a href="https://arxiv.org/format/2405.10691">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LoCI-DiffCom: Longitudinal Consistency-Informed Diffusion Model for 3D Infant Brain Image Completion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+Z">Zihao Zhu</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+T">Tianli Tao</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+Y">Yitian Tao</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+H">Haowen Deng</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+X">Xinyi Cai</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+G">Gaofeng Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+K">Kaidong Wang</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+H">Haifeng Tang</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lixuan Zhu</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+Z">Zhuoyang Gu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiawei Huang</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+D">Dinggang Shen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Han Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10691v1-abstract-short" style="display: inline;"> The infant brain undergoes rapid development in the first few years after birth.Compared to cross-sectional studies, longitudinal studies can depict the trajectories of infants brain development with higher accuracy, statistical power and flexibility.However, the collection of infant longitudinal magnetic resonance (MR) data suffers a notorious dropout problem, resulting in incomplete datasets wit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10691v1-abstract-full').style.display = 'inline'; document.getElementById('2405.10691v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10691v1-abstract-full" style="display: none;"> The infant brain undergoes rapid development in the first few years after birth.Compared to cross-sectional studies, longitudinal studies can depict the trajectories of infants brain development with higher accuracy, statistical power and flexibility.However, the collection of infant longitudinal magnetic resonance (MR) data suffers a notorious dropout problem, resulting in incomplete datasets with missing time points. This limitation significantly impedes subsequent neuroscience and clinical modeling. Yet, existing deep generative models are facing difficulties in missing brain image completion, due to sparse data and the nonlinear, dramatic contrast/geometric variations in the developing brain. We propose LoCI-DiffCom, a novel Longitudinal Consistency-Informed Diffusion model for infant brain image Completion,which integrates the images from preceding and subsequent time points to guide a diffusion model for generating high-fidelity missing data. Our designed LoCI module can work on highly sparse sequences, relying solely on data from two temporal points. Despite wide separation and diversity between age time points, our approach can extract individualized developmental features while ensuring context-aware consistency. Our experiments on a large infant brain MR dataset demonstrate its effectiveness with consistent performance on missing infant brain MR completion even in big gap scenarios, aiding in better delineation of early developmental trajectories. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10691v1-abstract-full').style.display = 'none'; document.getElementById('2405.10691v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10561">arXiv:2405.10561</a> <span> [<a href="https://arxiv.org/pdf/2405.10561">pdf</a>, <a href="https://arxiv.org/format/2405.10561">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Infrared Image Super-Resolution via Lightweight Information Split Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shijie Liu</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+K">Kang Yan</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+F">Feiwei Qin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Changmiao Wang</a>, <a href="/search/eess?searchtype=author&query=Ge%2C+R">Ruiquan Ge</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jie Huang</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+Y">Yong Peng</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+J">Jin Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10561v3-abstract-short" style="display: inline;"> Single image super-resolution (SR) is an established pixel-level vision task aimed at reconstructing a high-resolution image from its degraded low-resolution counterpart. Despite the notable advancements achieved by leveraging deep neural networks for SR, most existing deep learning architectures feature an extensive number of layers, leading to high computational complexity and substantial memory… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10561v3-abstract-full').style.display = 'inline'; document.getElementById('2405.10561v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10561v3-abstract-full" style="display: none;"> Single image super-resolution (SR) is an established pixel-level vision task aimed at reconstructing a high-resolution image from its degraded low-resolution counterpart. Despite the notable advancements achieved by leveraging deep neural networks for SR, most existing deep learning architectures feature an extensive number of layers, leading to high computational complexity and substantial memory demands. These issues become particularly pronounced in the context of infrared image SR, where infrared devices often have stringent storage and computational constraints. To mitigate these challenges, we introduce a novel, efficient, and precise single infrared image SR model, termed the Lightweight Information Split Network (LISN). The LISN comprises four main components: shallow feature extraction, deep feature extraction, dense feature fusion, and high-resolution infrared image reconstruction. A key innovation within this model is the introduction of the Lightweight Information Split Block (LISB) for deep feature extraction. The LISB employs a sequential process to extract hierarchical features, which are then aggregated based on the relevance of the features under consideration. By integrating channel splitting and shift operations, the LISB successfully strikes an optimal balance between enhanced SR performance and a lightweight framework. Comprehensive experimental evaluations reveal that the proposed LISN achieves superior performance over contemporary state-of-the-art methods in terms of both SR quality and model complexity, affirming its efficacy for practical deployment in resource-constrained infrared imaging applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10561v3-abstract-full').style.display = 'none'; document.getElementById('2405.10561v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04476">arXiv:2405.04476</a> <span> [<a href="https://arxiv.org/pdf/2405.04476">pdf</a>, <a href="https://arxiv.org/format/2405.04476">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> BERP: A Blind Estimator of Room Acoustic and Physical Parameters for Single-Channel Noisy Speech Signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+L">Lijun Wang</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Y">Yixian Lu</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Ziyan Gao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+K">Kai Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jianqiang Huang</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+Y">Yuntao Kong</a>, <a href="/search/eess?searchtype=author&query=Okada%2C+S">Shogo Okada</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04476v5-abstract-short" style="display: inline;"> Room acoustic parameters (RAPs) and room physical parameters (RPPs) are essential metrics for parameterizing the room acoustical characteristics (RACs) of a sound field around a listener's local environment, offering comprehensive indications for various applications. Current RAP and RPP estimation methods either fall short of covering broad real-world acoustic environments in the context of real… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04476v5-abstract-full').style.display = 'inline'; document.getElementById('2405.04476v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04476v5-abstract-full" style="display: none;"> Room acoustic parameters (RAPs) and room physical parameters (RPPs) are essential metrics for parameterizing the room acoustical characteristics (RACs) of a sound field around a listener's local environment, offering comprehensive indications for various applications. Current RAP and RPP estimation methods either fall short of covering broad real-world acoustic environments in the context of real background noise or lack universal frameworks for blindly estimating RAPs and RPPs from noisy single-channel speech signals, particularly sound source distances, direction of arrival (DOA) of sound sources, and occupancy levels. On the other hand, in this paper, we propose a new universal blind estimation framework called the blind estimator of the room acoustical and physical parameters (BERP), by introducing a new stochastic room impulse response (RIR) model, namely the sparse stochastic impulse response (SSIR) model, and endowing the BERP with a unified encoder and multiple separate predictors to estimate the RPPs and the parameters SSIR in parallel. This estimation framework enables computationally efficient and universal estimation of room parameters using only noisy single-channel speech signals. Finally, all RAPs can be simultaneously derived from RIRs synthesized from the SSIR model with estimated parameters. To evaluate the effectiveness of the proposed BERP and SSIR models, we compile a task-specific dataset from several publicly available datasets. The results reveal that the BERP achieves state-of-the-art (SOTA) performance. In addition, the evaluation results for the SSIR RIR model also demonstrated its efficacy. The code is available on GitHub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04476v5-abstract-full').style.display = 'none'; document.getElementById('2405.04476v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16-page, erratum revision, Submitted to IEEE/ACM Transaction on Audio Speech and Language Processing (TASLP)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13804">arXiv:2404.13804</a> <span> [<a href="https://arxiv.org/pdf/2404.13804">pdf</a>, <a href="https://arxiv.org/format/2404.13804">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Heterogeneous Client Sampling for Federated Learning over Wireless Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Luo%2C+B">Bing Luo</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+W">Wenli Xiao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shiqiang Wang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jianwei Huang</a>, <a href="/search/eess?searchtype=author&query=Tassiulas%2C+L">Leandros Tassiulas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13804v1-abstract-short" style="display: inline;"> Federated learning (FL) algorithms usually sample a fraction of clients in each round (partial participation) when the number of participants is large and the server's communication bandwidth is limited. Recent works on the convergence analysis of FL have focused on unbiased client sampling, e.g., sampling uniformly at random, which suffers from slow wall-clock time for convergence due to high deg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13804v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13804v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13804v1-abstract-full" style="display: none;"> Federated learning (FL) algorithms usually sample a fraction of clients in each round (partial participation) when the number of participants is large and the server's communication bandwidth is limited. Recent works on the convergence analysis of FL have focused on unbiased client sampling, e.g., sampling uniformly at random, which suffers from slow wall-clock time for convergence due to high degrees of system heterogeneity and statistical heterogeneity. This paper aims to design an adaptive client sampling algorithm for FL over wireless networks that tackles both system and statistical heterogeneity to minimize the wall-clock convergence time. We obtain a new tractable convergence bound for FL algorithms with arbitrary client sampling probability. Based on the bound, we analytically establish the relationship between the total learning time and sampling probability with an adaptive bandwidth allocation scheme, which results in a non-convex optimization problem. We design an efficient algorithm for learning the unknown parameters in the convergence bound and develop a low-complexity algorithm to approximately solve the non-convex problem. Our solution reveals the impact of system and statistical heterogeneity parameters on the optimal client sampling design. Moreover, our solution shows that as the number of sampled clients increases, the total convergence time first decreases and then increases because a larger sampling number reduces the number of rounds for convergence but results in a longer expected time per-round due to limited wireless bandwidth. Experimental results from both hardware prototype and simulation demonstrate that our proposed sampling scheme significantly reduces the convergence time compared to several baseline sampling schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13804v1-abstract-full').style.display = 'none'; document.getElementById('2404.13804v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in IEEE Transactions on Mobile Computing (TMC). arXiv admin note: substantial text overlap with arXiv:2112.11256</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13358">arXiv:2404.13358</a> <span> [<a href="https://arxiv.org/pdf/2404.13358">pdf</a>, <a href="https://arxiv.org/format/2404.13358">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Music Consistency Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Fei%2C+Z">Zhengcong Fei</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+M">Mingyuan Fan</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Junshi Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13358v1-abstract-short" style="display: inline;"> Consistency models have exhibited remarkable capabilities in facilitating efficient image/video generation, enabling synthesis with minimal sampling steps. It has proven to be advantageous in mitigating the computational burdens associated with diffusion models. Nevertheless, the application of consistency models in music generation remains largely unexplored. To address this gap, we present Music… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13358v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13358v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13358v1-abstract-full" style="display: none;"> Consistency models have exhibited remarkable capabilities in facilitating efficient image/video generation, enabling synthesis with minimal sampling steps. It has proven to be advantageous in mitigating the computational burdens associated with diffusion models. Nevertheless, the application of consistency models in music generation remains largely unexplored. To address this gap, we present Music Consistency Models (\texttt{MusicCM}), which leverages the concept of consistency models to efficiently synthesize mel-spectrogram for music clips, maintaining high quality while minimizing the number of sampling steps. Building upon existing text-to-music diffusion models, the \texttt{MusicCM} model incorporates consistency distillation and adversarial discriminator training. Moreover, we find it beneficial to generate extended coherent music by incorporating multiple diffusion processes with shared constraints. Experimental results reveal the effectiveness of our model in terms of computational efficiency, fidelity, and naturalness. Notable, \texttt{MusicCM} achieves seamless music synthesis with a mere four sampling steps, e.g., only one second per minute of the music clip, showcasing the potential for real-time application. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13358v1-abstract-full').style.display = 'none'; document.getElementById('2404.13358v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.11350">arXiv:2404.11350</a> <span> [<a href="https://arxiv.org/pdf/2404.11350">pdf</a>, <a href="https://arxiv.org/format/2404.11350">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Calibrating Bayesian Learning via Regularization, Confidence Minimization, and Selective Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiayi Huang</a>, <a href="/search/eess?searchtype=author&query=Park%2C+S">Sangwoo Park</a>, <a href="/search/eess?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.11350v1-abstract-short" style="display: inline;"> The application of artificial intelligence (AI) models in fields such as engineering is limited by the known difficulty of quantifying the reliability of an AI's decision. A well-calibrated AI model must correctly report its accuracy on in-distribution (ID) inputs, while also enabling the detection of out-of-distribution (OOD) inputs. A conventional approach to improve calibration is the applicati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.11350v1-abstract-full').style.display = 'inline'; document.getElementById('2404.11350v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.11350v1-abstract-full" style="display: none;"> The application of artificial intelligence (AI) models in fields such as engineering is limited by the known difficulty of quantifying the reliability of an AI's decision. A well-calibrated AI model must correctly report its accuracy on in-distribution (ID) inputs, while also enabling the detection of out-of-distribution (OOD) inputs. A conventional approach to improve calibration is the application of Bayesian ensembling. However, owing to computational limitations and model misspecification, practical ensembling strategies do not necessarily enhance calibration. This paper proposes an extension of variational inference (VI)-based Bayesian learning that integrates calibration regularization for improved ID performance, confidence minimization for OOD detection, and selective calibration to ensure a synergistic use of calibration regularization and confidence minimization. The scheme is constructed successively by first introducing calibration-regularized Bayesian learning (CBNN), then incorporating out-of-distribution confidence minimization (OCM) to yield CBNN-OCM, and finally integrating also selective calibration to produce selective CBNN-OCM (SCBNN-OCM). Selective calibration rejects inputs for which the calibration performance is expected to be insufficient. Numerical results illustrate the trade-offs between ID accuracy, ID calibration, and OOD calibration attained by both frequentist and Bayesian learning methods. Among the main conclusions, SCBNN-OCM is seen to achieve best ID and OOD performance as compared to existing state-of-the-art approaches at the cost of rejecting a sufficiently large number of inputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.11350v1-abstract-full').style.display = 'none'; document.getElementById('2404.11350v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.07338">arXiv:2403.07338</a> <span> [<a href="https://arxiv.org/pdf/2403.07338">pdf</a>, <a href="https://arxiv.org/ps/2403.07338">ps</a>, <a href="https://arxiv.org/format/2403.07338">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> D$^2$-JSCC: Digital Deep Joint Source-channel Coding for Semantic Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jianhao Huang</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+K">Kai Yuan</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuan Huang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+K">Kaibin Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.07338v3-abstract-short" style="display: inline;"> Semantic communications (SemCom) have emerged as a new paradigm for supporting sixth-generation applications, where semantic features of data are transmitted using artificial intelligence algorithms to attain high communication efficiencies. Most existing SemCom techniques utilize deep neural networks (DNNs) to implement analog source-channel mappings, which are incompatible with existing digital… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07338v3-abstract-full').style.display = 'inline'; document.getElementById('2403.07338v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.07338v3-abstract-full" style="display: none;"> Semantic communications (SemCom) have emerged as a new paradigm for supporting sixth-generation applications, where semantic features of data are transmitted using artificial intelligence algorithms to attain high communication efficiencies. Most existing SemCom techniques utilize deep neural networks (DNNs) to implement analog source-channel mappings, which are incompatible with existing digital communication architectures. To address this issue, this paper proposes a novel framework of digital deep joint source-channel coding (D$^2$-JSCC) targeting image transmission in SemCom. The framework features digital source and channel codings that are jointly optimized to reduce the end-to-end (E2E) distortion. First, deep source coding with an adaptive density model is designed to encode semantic features according to their distributions. Second, digital channel coding is employed to protect encoded features against channel distortion. To facilitate their joint design, the E2E distortion is characterized as a function of the source and channel rates via the analysis of the Bayesian model and Lipschitz assumption on the DNNs. Then to minimize the E2E distortion, a two-step algorithm is proposed to control the source-channel rates for a given channel signal-to-noise ratio. Simulation results reveal that the proposed framework outperforms classic deep JSCC and mitigates the cliff and leveling-off effects, which commonly exist for separation-based approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.07338v3-abstract-full').style.display = 'none'; document.getElementById('2403.07338v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18856">arXiv:2402.18856</a> <span> [<a href="https://arxiv.org/pdf/2402.18856">pdf</a>, <a href="https://arxiv.org/format/2402.18856">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ISBI56570.2024.10635408">10.1109/ISBI56570.2024.10635408 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Anatomy-guided fiber trajectory distribution estimation for cranial nerves tractography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+Q">Qingrun Zeng</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+H">Huajun Zhou</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+G">Guoqiang Xie</a>, <a href="/search/eess?searchtype=author&query=Li%2C+M">Mingchu Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+J">Jianan Cui</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+Y">Yuanjing Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18856v1-abstract-short" style="display: inline;"> Diffusion MRI tractography is an important tool for identifying and analyzing the intracranial course of cranial nerves (CNs). However, the complex environment of the skull base leads to ambiguous spatial correspondence between diffusion directions and fiber geometry, and existing diffusion tractography methods of CNs identification are prone to producing erroneous trajectories and missing true po… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18856v1-abstract-full').style.display = 'inline'; document.getElementById('2402.18856v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18856v1-abstract-full" style="display: none;"> Diffusion MRI tractography is an important tool for identifying and analyzing the intracranial course of cranial nerves (CNs). However, the complex environment of the skull base leads to ambiguous spatial correspondence between diffusion directions and fiber geometry, and existing diffusion tractography methods of CNs identification are prone to producing erroneous trajectories and missing true positive connections. To overcome the above challenge, we propose a novel CNs identification framework with anatomy-guided fiber trajectory distribution, which incorporates anatomical shape prior knowledge during the process of CNs tracing to build diffusion tensor vector fields. We introduce higher-order streamline differential equations for continuous flow field representations to directly characterize the fiber trajectory distribution of CNs from the tract-based level. The experimental results on the vivo HCP dataset and the clinical MDM dataset demonstrate that the proposed method reduces false-positive fiber production compared to competing methods and produces reconstructed CNs (i.e. CN II, CN III, CN V, and CN VII/VIII) that are judged to better correspond to the known anatomy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18856v1-abstract-full').style.display = 'none'; document.getElementById('2402.18856v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18451">arXiv:2402.18451</a> <span> [<a href="https://arxiv.org/pdf/2402.18451">pdf</a>, <a href="https://arxiv.org/format/2402.18451">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MambaMIR: An Arbitrary-Masked Mamba for Joint Medical Image Reconstruction and Uncertainty Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+L">Liutao Yang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=Aviles-Rivero%2C+A+I">Angelica I. Aviles-Rivero</a>, <a href="/search/eess?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+D">Daoqiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18451v3-abstract-short" style="display: inline;"> The recent Mamba model has shown remarkable adaptability for visual representation learning, including in medical imaging tasks. This study introduces MambaMIR, a Mamba-based model for medical image reconstruction, as well as its Generative Adversarial Network-based variant, MambaMIR-GAN. Our proposed MambaMIR inherits several advantages, such as linear complexity, global receptive fields, and dyn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18451v3-abstract-full').style.display = 'inline'; document.getElementById('2402.18451v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18451v3-abstract-full" style="display: none;"> The recent Mamba model has shown remarkable adaptability for visual representation learning, including in medical imaging tasks. This study introduces MambaMIR, a Mamba-based model for medical image reconstruction, as well as its Generative Adversarial Network-based variant, MambaMIR-GAN. Our proposed MambaMIR inherits several advantages, such as linear complexity, global receptive fields, and dynamic weights, from the original Mamba model. The innovated arbitrary-mask mechanism effectively adapt Mamba to our image reconstruction task, providing randomness for subsequent Monte Carlo-based uncertainty estimation. Experiments conducted on various medical image reconstruction tasks, including fast MRI and SVCT, which cover anatomical regions such as the knee, chest, and abdomen, have demonstrated that MambaMIR and MambaMIR-GAN achieve comparable or superior reconstruction results relative to state-of-the-art methods. Additionally, the estimated uncertainty maps offer further insights into the reliability of the reconstruction quality. The code is publicly available at https://github.com/ayanglab/MambaMIR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18451v3-abstract-full').style.display = 'none'; document.getElementById('2402.18451v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.13776">arXiv:2402.13776</a> <span> [<a href="https://arxiv.org/pdf/2402.13776">pdf</a>, <a href="https://arxiv.org/format/2402.13776">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Cas-DiffCom: Cascaded diffusion model for infant longitudinal super-resolution 3D medical image completion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Guo%2C+L">Lianghu Guo</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+T">Tianli Tao</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+X">Xinyi Cai</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Z">Zihao Zhu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiawei Huang</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lixuan Zhu</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+Z">Zhuoyang Gu</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+H">Haifeng Tang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+R">Rui Zhou</a>, <a href="/search/eess?searchtype=author&query=Han%2C+S">Siyan Han</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+Y">Yan Liang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Q">Qing Yang</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+D">Dinggang Shen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Han Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.13776v1-abstract-short" style="display: inline;"> Early infancy is a rapid and dynamic neurodevelopmental period for behavior and neurocognition. Longitudinal magnetic resonance imaging (MRI) is an effective tool to investigate such a crucial stage by capturing the developmental trajectories of the brain structures. However, longitudinal MRI acquisition always meets a serious data-missing problem due to participant dropout and failed scans, makin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13776v1-abstract-full').style.display = 'inline'; document.getElementById('2402.13776v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.13776v1-abstract-full" style="display: none;"> Early infancy is a rapid and dynamic neurodevelopmental period for behavior and neurocognition. Longitudinal magnetic resonance imaging (MRI) is an effective tool to investigate such a crucial stage by capturing the developmental trajectories of the brain structures. However, longitudinal MRI acquisition always meets a serious data-missing problem due to participant dropout and failed scans, making longitudinal infant brain atlas construction and developmental trajectory delineation quite challenging. Thanks to the development of an AI-based generative model, neuroimage completion has become a powerful technique to retain as much available data as possible. However, current image completion methods usually suffer from inconsistency within each individual subject in the time dimension, compromising the overall quality. To solve this problem, our paper proposed a two-stage cascaded diffusion model, Cas-DiffCom, for dense and longitudinal 3D infant brain MRI completion and super-resolution. We applied our proposed method to the Baby Connectome Project (BCP) dataset. The experiment results validate that Cas-DiffCom achieves both individual consistency and high fidelity in longitudinal infant brain image completion. We further applied the generated infant brain images to two downstream tasks, brain tissue segmentation and developmental trajectory delineation, to declare its task-oriented potential in the neuroscience field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13776v1-abstract-full').style.display = 'none'; document.getElementById('2402.13776v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11898">arXiv:2402.11898</a> <span> [<a href="https://arxiv.org/pdf/2402.11898">pdf</a>, <a href="https://arxiv.org/format/2402.11898">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Automatic Radio Map Adaptation for Robust Localization with Dynamic Adversarial Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Lingyan Zhang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Junlin Huang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+T">Tingting Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Q">Qinyu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11898v1-abstract-short" style="display: inline;"> Wireless fingerprint-based localization has become one of the most promising technologies for ubiquitous location-aware computing and intelligent location-based services. However, due to RF vulnerability to environmental dynamics over time, continuous radio map updates are time-consuming and infeasible, resulting in severe accuracy degradation. To address this issue, we propose a novel approach of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11898v1-abstract-full').style.display = 'inline'; document.getElementById('2402.11898v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11898v1-abstract-full" style="display: none;"> Wireless fingerprint-based localization has become one of the most promising technologies for ubiquitous location-aware computing and intelligent location-based services. However, due to RF vulnerability to environmental dynamics over time, continuous radio map updates are time-consuming and infeasible, resulting in severe accuracy degradation. To address this issue, we propose a novel approach of robust localization with dynamic adversarial learning, known as DadLoc which realizes automatic radio map adaptation by incorporating multiple robust factors underlying RF fingerprints to learn the evolving feature representation with the complicated environmental dynamics. DadLoc performs a finer-grained distribution adaptation with the developed dynamic adversarial adaptation network and quantifies the contributions of both global and local distribution adaptation in a dynamics-adaptive manner. Furthermore, we adopt the strategy of prediction uncertainty suppression to conduct source-supervised training, target-unsupervised training, and source-target dynamic adversarial adaptation which can trade off the environment adaptability and the location discriminability of the learned deep representation for safe and effective feature transfer across different environments. With extensive experimental results, the satisfactory accuracy over other comparative schemes demonstrates that the proposed DanLoc can facilitate fingerprint-based localization for wide deployments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11898v1-abstract-full').style.display = 'none'; document.getElementById('2402.11898v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.04267">arXiv:2402.04267</a> <span> [<a href="https://arxiv.org/pdf/2402.04267">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.54097/LAwfJzEA">10.54097/LAwfJzEA <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Application analysis of ai technology combined with spiral CT scanning in early lung cancer screening </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+S">Shulin Li</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+L">Liqiang Yu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+B">Bo Liu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Q">Qunwei Lin</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiaxin Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.04267v1-abstract-short" style="display: inline;"> At present, the incidence and fatality rate of lung cancer in China rank first among all malignant tumors. Despite the continuous development and improvement of China's medical level, the overall 5-year survival rate of lung cancer patients is still lower than 20% and is staged. A number of studies have confirmed that early diagnosis and treatment of early stage lung cancer is of great significanc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04267v1-abstract-full').style.display = 'inline'; document.getElementById('2402.04267v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.04267v1-abstract-full" style="display: none;"> At present, the incidence and fatality rate of lung cancer in China rank first among all malignant tumors. Despite the continuous development and improvement of China's medical level, the overall 5-year survival rate of lung cancer patients is still lower than 20% and is staged. A number of studies have confirmed that early diagnosis and treatment of early stage lung cancer is of great significance to improve the prognosis of patients. In recent years, artificial intelligence technology has gradually begun to be applied in oncology. ai is used in cancer screening, clinical diagnosis, radiation therapy (image acquisition, at-risk organ segmentation, image calibration and delivery) and other aspects of rapid development. However, whether medical ai can be socialized depends on the public's attitude and acceptance to a certain extent. However, at present, there are few studies on the diagnosis of early lung cancer by AI technology combined with SCT scanning. In view of this, this study applied the combined method in early lung cancer screening, aiming to find a safe and efficient screening mode and provide a reference for clinical diagnosis and treatment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.04267v1-abstract-full').style.display = 'none'; document.getElementById('2402.04267v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This article was accepted by Frontiers in Computing and Intelligent Systems https://drpress.org/ojs/index.php/fcis/article/view/15781. arXiv admin note: text overlap with arXiv:nlin/0508031 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.16564">arXiv:2401.16564</a> <span> [<a href="https://arxiv.org/pdf/2401.16564">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/RBME.2024.3485022">10.1109/RBME.2024.3485022 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Data and Physics driven Deep Learning Models for Fast MRI Reconstruction: Fundamentals and Methodologies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yinzhe Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yingying Fang</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=Alkan%2C+C">Cagan Alkan</a>, <a href="/search/eess?searchtype=author&query=Abraham%2C+D">Daniel Abraham</a>, <a href="/search/eess?searchtype=author&query=Liao%2C+C">Congyu Liao</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Lei Xu</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zhifan Gao</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+W">Weiwen Wu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lei Zhu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhaolin Chen</a>, <a href="/search/eess?searchtype=author&query=Lally%2C+P">Peter Lally</a>, <a href="/search/eess?searchtype=author&query=Bangerter%2C+N">Neal Bangerter</a>, <a href="/search/eess?searchtype=author&query=Setsompop%2C+K">Kawin Setsompop</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yike Guo</a>, <a href="/search/eess?searchtype=author&query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+G">Ge Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.16564v2-abstract-short" style="display: inline;"> Magnetic Resonance Imaging (MRI) is a pivotal clinical diagnostic tool, yet its extended scanning times often compromise patient comfort and image quality, especially in volumetric, temporal and quantitative scans. This review elucidates recent advances in MRI acceleration via data and physics-driven models, leveraging techniques from algorithm unrolling models, enhancement-based methods, and plug… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16564v2-abstract-full').style.display = 'inline'; document.getElementById('2401.16564v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.16564v2-abstract-full" style="display: none;"> Magnetic Resonance Imaging (MRI) is a pivotal clinical diagnostic tool, yet its extended scanning times often compromise patient comfort and image quality, especially in volumetric, temporal and quantitative scans. This review elucidates recent advances in MRI acceleration via data and physics-driven models, leveraging techniques from algorithm unrolling models, enhancement-based methods, and plug-and-play models to the emerging full spectrum of generative model-based methods. We also explore the synergistic integration of data models with physics-based insights, encompassing the advancements in multi-coil hardware accelerations like parallel imaging and simultaneous multi-slice imaging, and the optimization of sampling patterns. We then focus on domain-specific challenges and opportunities, including image redundancy exploitation, image integrity, evaluation metrics, data heterogeneity, and model generalization. This work also discusses potential solutions and future research directions, with an emphasis on the role of data harmonization and federated learning for further improving the general applicability and performance of these methods in MRI reconstruction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16564v2-abstract-full').style.display = 'none'; document.getElementById('2401.16564v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Reviews in Biomedical Engineering (RBME)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13220">arXiv:2401.13220</a> <span> [<a href="https://arxiv.org/pdf/2401.13220">pdf</a>, <a href="https://arxiv.org/format/2401.13220">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Segment Any Cell: A SAM-based Auto-prompting Fine-tuning Framework for Nuclei Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Na%2C+S">Saiyang Na</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yuzhi Guo</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+F">Feng Jiang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+H">Hehuan Ma</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Junzhou Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13220v1-abstract-short" style="display: inline;"> In the rapidly evolving field of AI research, foundational models like BERT and GPT have significantly advanced language and vision tasks. The advent of pretrain-prompting models such as ChatGPT and Segmentation Anything Model (SAM) has further revolutionized image segmentation. However, their applications in specialized areas, particularly in nuclei segmentation within medical imaging, reveal a k… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13220v1-abstract-full').style.display = 'inline'; document.getElementById('2401.13220v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13220v1-abstract-full" style="display: none;"> In the rapidly evolving field of AI research, foundational models like BERT and GPT have significantly advanced language and vision tasks. The advent of pretrain-prompting models such as ChatGPT and Segmentation Anything Model (SAM) has further revolutionized image segmentation. However, their applications in specialized areas, particularly in nuclei segmentation within medical imaging, reveal a key challenge: the generation of high-quality, informative prompts is as crucial as applying state-of-the-art (SOTA) fine-tuning techniques on foundation models. To address this, we introduce Segment Any Cell (SAC), an innovative framework that enhances SAM specifically for nuclei segmentation. SAC integrates a Low-Rank Adaptation (LoRA) within the attention layer of the Transformer to improve the fine-tuning process, outperforming existing SOTA methods. It also introduces an innovative auto-prompt generator that produces effective prompts to guide segmentation, a critical factor in handling the complexities of nuclei segmentation in biomedical imaging. Our extensive experiments demonstrate the superiority of SAC in nuclei segmentation tasks, proving its effectiveness as a tool for pathologists and researchers. Our contributions include a novel prompt generation strategy, automated adaptability for diverse segmentation tasks, the innovative application of Low-Rank Attention Adaptation in SAM, and a versatile framework for semantic segmentation challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13220v1-abstract-full').style.display = 'none'; document.getElementById('2401.13220v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08935">arXiv:2401.08935</a> <span> [<a href="https://arxiv.org/pdf/2401.08935">pdf</a>, <a href="https://arxiv.org/format/2401.08935">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Privacy Protected Contactless Cardio-respiratory Monitoring using Defocused Cameras during Sleep </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhu%2C+Y">Yingen Zhu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jia Huang</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+H">Hongzhou Lu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+W">Wenjin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08935v1-abstract-short" style="display: inline;"> The monitoring of vital signs such as heart rate (HR) and respiratory rate (RR) during sleep is important for the assessment of sleep quality and detection of sleep disorders. Camera-based HR and RR monitoring gained popularity in sleep monitoring in recent years. However, they are all facing with serious privacy issues when using a video camera in the sleeping scenario. In this paper, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08935v1-abstract-full').style.display = 'inline'; document.getElementById('2401.08935v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08935v1-abstract-full" style="display: none;"> The monitoring of vital signs such as heart rate (HR) and respiratory rate (RR) during sleep is important for the assessment of sleep quality and detection of sleep disorders. Camera-based HR and RR monitoring gained popularity in sleep monitoring in recent years. However, they are all facing with serious privacy issues when using a video camera in the sleeping scenario. In this paper, we propose to use the defocused camera to measure vital signs from optically blurred images, which can fundamentally eliminate the privacy invasion as face is difficult to be identified in obtained blurry images. A spatial-redundant framework involving living-skin detection is used to extract HR and RR from the defocused camera in NIR, and a motion metric is designed to exclude outliers caused by body motions. In the benchmark, the overall Mean Absolute Error (MAE) for HR measurement is 4.4 bpm, for RR measurement is 5.9 bpm. Both have quality drops as compared to the measurement using a focused camera, but the degradation in HR is much less, i.e. HR measurement has strong correlation with the reference ($R \geq 0.90$). Preliminary experiments suggest that it is feasible to use a defocused camera for cardio-respiratory monitoring while protecting the privacy. Further improvement is needed for robust RR measurement, such as by PPG-modulation based RR extraction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08935v1-abstract-full').style.display = 'none'; document.getElementById('2401.08935v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08921">arXiv:2401.08921</a> <span> [<a href="https://arxiv.org/pdf/2401.08921">pdf</a>, <a href="https://arxiv.org/format/2401.08921">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Electromagnetic Information Theory: Fundamentals and Applications for 6G Wireless Communication Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+C">Cheng-Xiang Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yue Yang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jie Huang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+X">Xiqi Gao</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+T+J">Tie Jun Cui</a>, <a href="/search/eess?searchtype=author&query=Hanzo%2C+L">Lajos Hanzo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08921v1-abstract-short" style="display: inline;"> In wireless communications, electromagnetic theory and information theory constitute a pair of fundamental theories, bridged by antenna theory and wireless propagation channel modeling theory. Up to the fifth generation (5G) wireless communication networks, these four theories have been developing relatively independently. However, in sixth generation (6G) space-air-ground-sea wireless communicati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08921v1-abstract-full').style.display = 'inline'; document.getElementById('2401.08921v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08921v1-abstract-full" style="display: none;"> In wireless communications, electromagnetic theory and information theory constitute a pair of fundamental theories, bridged by antenna theory and wireless propagation channel modeling theory. Up to the fifth generation (5G) wireless communication networks, these four theories have been developing relatively independently. However, in sixth generation (6G) space-air-ground-sea wireless communication networks, seamless coverage is expected in the three-dimensional (3D) space, potentially necessitating the acquisition of channel state information (CSI) and channel capacity calculation at anywhere and any time. Additionally, the key 6G technologies such as ultra-massive multiple-input multiple-output (MIMO) and holographic MIMO achieves intricate interaction of the antennas and wireless propagation environments, which necessitates the joint modeling of antennas and wireless propagation channels. To address the challenges in 6G, the integration of the above four theories becomes inevitable, leading to the concept of the so-called electromagnetic information theory (EIT). In this article, a suite of 6G key technologies is highlighted. Then, the concepts and relationships of the four theories are unveiled. Finally, the necessity and benefits of integrating them into the EIT are revealed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08921v1-abstract-full').style.display = 'none'; document.getElementById('2401.08921v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.05606">arXiv:2401.05606</a> <span> [<a href="https://arxiv.org/pdf/2401.05606">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Weiss-Weinstein bound of frequency estimation error for very weak GNSS signals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xin Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhan%2C+X">Xingqun Zhan</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jihong Huang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jiahui Liu</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yingchao Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.05606v1-abstract-short" style="display: inline;"> Tightness remains the center quest in all modern estimation bounds. For very weak signals, this is made possible with judicial choices of prior probability distribution and bound family. While current bounds in GNSS assess performance of carrier frequency estimators under Gaussian or uniform assumptions, the circular nature of frequency is overlooked. In addition, of all bounds in Bayesian framewo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05606v1-abstract-full').style.display = 'inline'; document.getElementById('2401.05606v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.05606v1-abstract-full" style="display: none;"> Tightness remains the center quest in all modern estimation bounds. For very weak signals, this is made possible with judicial choices of prior probability distribution and bound family. While current bounds in GNSS assess performance of carrier frequency estimators under Gaussian or uniform assumptions, the circular nature of frequency is overlooked. In addition, of all bounds in Bayesian framework, Weiss-Weinstein bound (WWB) stands out since it is free from regularity conditions or requirements on the prior distribution. Therefore, WWB is extended for the current frequency estimation problem. A divide-and-conquer type of hyperparameter tuning method is developed to level off the curse of computational complexity for the WWB family while enhancing tightness. Synthetic results show that with von Mises as prior probability distribution, WWB provides a bound up to 22.5% tighter than Ziv-Zaka茂 bound (ZZB) when SNR varies between -3.5 dB and -20 dB, where GNSS signal is deemed extremely weak. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05606v1-abstract-full').style.display = 'none'; document.getElementById('2401.05606v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">35 pages, 13 figures, submitted to NAVIGATION, Journal of the Institute of Navigation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.01176">arXiv:2401.01176</a> <span> [<a href="https://arxiv.org/pdf/2401.01176">pdf</a>, <a href="https://arxiv.org/format/2401.01176">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Fundamental Limitation of Semantic Communications: Neural Estimation for Rate-Distortion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+D">Dongxu Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jianhao Huang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+C">Chuan Huang</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+X">Xiaoqi Qin</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Han Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+P">Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.01176v1-abstract-short" style="display: inline;"> This paper studies the fundamental limit of semantic communications over the discrete memoryless channel. We consider the scenario to send a semantic source consisting of an observation state and its corresponding semantic state, both of which are recovered at the receiver. To derive the performance limitation, we adopt the semantic rate-distortion function (SRDF) to study the relationship among t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01176v1-abstract-full').style.display = 'inline'; document.getElementById('2401.01176v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.01176v1-abstract-full" style="display: none;"> This paper studies the fundamental limit of semantic communications over the discrete memoryless channel. We consider the scenario to send a semantic source consisting of an observation state and its corresponding semantic state, both of which are recovered at the receiver. To derive the performance limitation, we adopt the semantic rate-distortion function (SRDF) to study the relationship among the minimum compression rate, observation distortion, semantic distortion, and channel capacity. For the case with unknown semantic source distribution, while only a set of the source samples is available, we propose a neural-network-based method by leveraging the generative networks to learn the semantic source distribution. Furthermore, for a special case where the semantic state is a deterministic function of the observation, we design a cascade neural network to estimate the SRDF. For the case with perfectly known semantic source distribution, we propose a general Blahut-Arimoto algorithm to effectively compute the SRDF. Finally, experimental results validate our proposed algorithms for the scenarios with ideal Gaussian semantic source and some practical datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01176v1-abstract-full').style.display = 'none'; document.getElementById('2401.01176v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.09576">arXiv:2312.09576</a> <span> [<a href="https://arxiv.org/pdf/2312.09576">pdf</a>, <a href="https://arxiv.org/format/2312.09576">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SegRap2023: A Benchmark of Organs-at-Risk and Gross Tumor Volume Segmentation for Radiotherapy Planning of Nasopharyngeal Carcinoma </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Luo%2C+X">Xiangde Luo</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+J">Jia Fu</a>, <a href="/search/eess?searchtype=author&query=Zhong%2C+Y">Yunxin Zhong</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuolin Liu</a>, <a href="/search/eess?searchtype=author&query=Han%2C+B">Bing Han</a>, <a href="/search/eess?searchtype=author&query=Astaraki%2C+M">Mehdi Astaraki</a>, <a href="/search/eess?searchtype=author&query=Bendazzoli%2C+S">Simone Bendazzoli</a>, <a href="/search/eess?searchtype=author&query=Toma-Dasu%2C+I">Iuliana Toma-Dasu</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+Y">Yiwen Ye</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Ziyang Chen</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+Y">Yong Xia</a>, <a href="/search/eess?searchtype=author&query=Su%2C+Y">Yanzhou Su</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+J">Jin Ye</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Junjun He</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+Z">Zhaohu Xing</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hongqiu Wang</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lei Zhu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+K">Kaixiang Yang</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+X">Xin Fang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhiwei Wang</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+C+W">Chan Woong Lee</a>, <a href="/search/eess?searchtype=author&query=Park%2C+S+J">Sang Joon Park</a>, <a href="/search/eess?searchtype=author&query=Chun%2C+J">Jaehee Chun</a>, <a href="/search/eess?searchtype=author&query=Ulrich%2C+C">Constantin Ulrich</a>, <a href="/search/eess?searchtype=author&query=Maier-Hein%2C+K+H">Klaus H. Maier-Hein</a> , et al. (17 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.09576v1-abstract-short" style="display: inline;"> Radiation therapy is a primary and effective NasoPharyngeal Carcinoma (NPC) treatment strategy. The precise delineation of Gross Tumor Volumes (GTVs) and Organs-At-Risk (OARs) is crucial in radiation treatment, directly impacting patient prognosis. Previously, the delineation of GTVs and OARs was performed by experienced radiation oncologists. Recently, deep learning has achieved promising results… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.09576v1-abstract-full').style.display = 'inline'; document.getElementById('2312.09576v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.09576v1-abstract-full" style="display: none;"> Radiation therapy is a primary and effective NasoPharyngeal Carcinoma (NPC) treatment strategy. The precise delineation of Gross Tumor Volumes (GTVs) and Organs-At-Risk (OARs) is crucial in radiation treatment, directly impacting patient prognosis. Previously, the delineation of GTVs and OARs was performed by experienced radiation oncologists. Recently, deep learning has achieved promising results in many medical image segmentation tasks. However, for NPC OARs and GTVs segmentation, few public datasets are available for model development and evaluation. To alleviate this problem, the SegRap2023 challenge was organized in conjunction with MICCAI2023 and presented a large-scale benchmark for OAR and GTV segmentation with 400 Computed Tomography (CT) scans from 200 NPC patients, each with a pair of pre-aligned non-contrast and contrast-enhanced CT scans. The challenge's goal was to segment 45 OARs and 2 GTVs from the paired CT scans. In this paper, we detail the challenge and analyze the solutions of all participants. The average Dice similarity coefficient scores for all submissions ranged from 76.68\% to 86.70\%, and 70.42\% to 73.44\% for OARs and GTVs, respectively. We conclude that the segmentation of large-size OARs is well-addressed, and more efforts are needed for GTVs and small-size or thin-structure OARs. The benchmark will remain publicly available here: https://segrap2023.grand-challenge.org <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.09576v1-abstract-full').style.display = 'none'; document.getElementById('2312.09576v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">A challenge report of SegRap2023 (organized in conjunction with MICCAI2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.00568">arXiv:2312.00568</a> <span> [<a href="https://arxiv.org/pdf/2312.00568">pdf</a>, <a href="https://arxiv.org/ps/2312.00568">ps</a>, <a href="https://arxiv.org/format/2312.00568">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A WINNER+ Based 3-D Non-Stationary Wideband MIMO Channel Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Bian%2C+J">Ji Bian</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jian Sun</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Cheng-Xiang Wang</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+R">Rui Feng</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jie Huang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+M">Minggao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.00568v1-abstract-short" style="display: inline;"> In this paper, a three-dimensional (3-D) non-stationary wideband multiple-input multiple-output (MIMO) channel model based on the WINNER+ channel model is proposed. The angular distributions of clusters in both the horizontal and vertical planes are jointly considered. The receiver and clusters can be moving, which makes the model more general. Parameters including number of clusters, powers, dela… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.00568v1-abstract-full').style.display = 'inline'; document.getElementById('2312.00568v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.00568v1-abstract-full" style="display: none;"> In this paper, a three-dimensional (3-D) non-stationary wideband multiple-input multiple-output (MIMO) channel model based on the WINNER+ channel model is proposed. The angular distributions of clusters in both the horizontal and vertical planes are jointly considered. The receiver and clusters can be moving, which makes the model more general. Parameters including number of clusters, powers, delays, azimuth angles of departure (AAoDs), azimuth angles of arrival (AAoAs), elevation angles of departure (EAoDs), and elevation angles of arrival (EAoAs) are time-variant. The cluster time evolution is modeled using a birth-death process. Statistical properties, including spatial cross-correlation function (CCF), temporal autocorrelation function (ACF), Doppler power spectrum density (PSD), level-crossing rate (LCR), average fading duration (AFD), and stationary interval are investigated and analyzed. The LCR, AFD, and stationary interval of the proposed channel model are validated against the measurement data. Numerical and simulation results show that the proposed channel model has the ability to reproduce the main properties of real non-stationary channels. Furthermore, the proposed channel model can be adapted to various communication scenarios by adjusting different parameter values. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.00568v1-abstract-full').style.display = 'none'; document.getElementById('2312.00568v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Huang%2C+J&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+J&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>