Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 244 results for author: <span class="mathjax">Yang, G</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Yang%2C+G">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Yang, G"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Yang%2C+G&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Yang, G"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yang%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yang%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yang%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yang%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yang%2C+G&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Yang%2C+G&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06667">arXiv:2411.06667</a> <span> [<a href="https://arxiv.org/pdf/2411.06667">pdf</a>, <a href="https://arxiv.org/format/2411.06667">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> DCF-DS: Deep Cascade Fusion of Diarization and Separation for Speech Recognition under Realistic Single-Channel Conditions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Niu%2C+S">Shu-Tong Niu</a>, <a href="/search/eess?searchtype=author&query=Du%2C+J">Jun Du</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+R">Ruo-Yu Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Gao-Bin Yang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+T">Tian Gao</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+J">Jia Pan</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Y">Yu Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06667v2-abstract-short" style="display: inline;"> We propose a single-channel Deep Cascade Fusion of Diarization and Separation (DCF-DS) framework for back-end speech recognition, combining neural speaker diarization (NSD) and speech separation (SS). First, we sequentially integrate the NSD and SS modules within a joint training framework, enabling the separation module to leverage speaker time boundaries from the diarization module effectively.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06667v2-abstract-full').style.display = 'inline'; document.getElementById('2411.06667v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06667v2-abstract-full" style="display: none;"> We propose a single-channel Deep Cascade Fusion of Diarization and Separation (DCF-DS) framework for back-end speech recognition, combining neural speaker diarization (NSD) and speech separation (SS). First, we sequentially integrate the NSD and SS modules within a joint training framework, enabling the separation module to leverage speaker time boundaries from the diarization module effectively. Then, to complement DCF-DS training, we introduce a window-level decoding scheme that allows the DCF-DS framework to handle the sparse data convergence instability (SDCI) problem. We also explore using an NSD system trained on real datasets to provide more accurate speaker boundaries during decoding. Additionally, we incorporate an optional multi-input multi-output speech enhancement module (MIMO-SE) within the DCF-DS framework, which offers further performance gains. Finally, we enhance diarization results by re-clustering DCF-DS outputs, improving ASR accuracy. By incorporating the DCF-DS method, we achieved first place in the realistic single-channel track of the CHiME-8 NOTSOFAR-1 challenge. We also perform the evaluation on the open LibriCSS dataset, achieving a new state-of-the-art single-channel speech recognition performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06667v2-abstract-full').style.display = 'none'; document.getElementById('2411.06667v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06437">arXiv:2411.06437</a> <span> [<a href="https://arxiv.org/pdf/2411.06437">pdf</a>, <a href="https://arxiv.org/format/2411.06437">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CTC-Assisted LLM-Based Contextual ASR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanrou Yang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zhifu Gao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Shiliang Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06437v1-abstract-short" style="display: inline;"> Contextual ASR or hotword customization holds substantial practical value. Despite the impressive performance of current end-to-end (E2E) automatic speech recognition (ASR) systems, they often face challenges in accurately recognizing rare words. Typical E2E contextual ASR models commonly feature complex architectures and decoding mechanisms, limited in performance and susceptible to interference… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06437v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06437v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06437v1-abstract-full" style="display: none;"> Contextual ASR or hotword customization holds substantial practical value. Despite the impressive performance of current end-to-end (E2E) automatic speech recognition (ASR) systems, they often face challenges in accurately recognizing rare words. Typical E2E contextual ASR models commonly feature complex architectures and decoding mechanisms, limited in performance and susceptible to interference from distractor words. With large language model (LLM)-based ASR models emerging as the new mainstream, we propose a CTC-Assisted LLM-Based Contextual ASR model with an efficient filtering algorithm. By using coarse CTC decoding results to filter potential relevant hotwords and incorporating them into LLM prompt input, our model attains WER/B-WER of 1.27%/3.67% and 2.72%/8.02% on the Librispeech test-clean and test-other sets targeting on recognizing rare long-tail words, demonstrating significant improvements compared to the baseline LLM-based ASR model, and substantially surpassing other related work. More remarkably, with the help of the large language model and proposed filtering algorithm, our contextual ASR model still performs well with 2000 biasing words. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06437v1-abstract-full').style.display = 'none'; document.getElementById('2411.06437v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">SLT 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03551">arXiv:2411.03551</a> <span> [<a href="https://arxiv.org/pdf/2411.03551">pdf</a>, <a href="https://arxiv.org/format/2411.03551">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Weakly Supervised Semantic Segmentation for Fibrosis via Controllable Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yue%2C+Z">Zhiling Yue</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yingying Fang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+L">Liutao Yang</a>, <a href="/search/eess?searchtype=author&query=Baid%2C+N">Nikhil Baid</a>, <a href="/search/eess?searchtype=author&query=Walsh%2C+S">Simon Walsh</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03551v1-abstract-short" style="display: inline;"> Fibrotic Lung Disease (FLD) is a severe condition marked by lung stiffening and scarring, leading to respiratory decline. High-resolution computed tomography (HRCT) is critical for diagnosing and monitoring FLD; however, fibrosis appears as irregular, diffuse patterns with unclear boundaries, leading to high inter-observer variability and time-intensive manual annotation. To tackle this challenge,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03551v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03551v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03551v1-abstract-full" style="display: none;"> Fibrotic Lung Disease (FLD) is a severe condition marked by lung stiffening and scarring, leading to respiratory decline. High-resolution computed tomography (HRCT) is critical for diagnosing and monitoring FLD; however, fibrosis appears as irregular, diffuse patterns with unclear boundaries, leading to high inter-observer variability and time-intensive manual annotation. To tackle this challenge, we propose DiffSeg, a novel weakly supervised semantic segmentation (WSSS) method that uses image-level annotations to generate pixel-level fibrosis segmentation, reducing the need for fine-grained manual labeling. Additionally, our DiffSeg incorporates a diffusion-based generative model to synthesize HRCT images with different levels of fibrosis from healthy slices, enabling the generation of the fibrosis-injected slices and their paired fibrosis location. Experiments indicate that our method significantly improves the accuracy of pseudo masks generated by existing WSSS methods, greatly reducing the complexity of manual labeling and enhancing the consistency of the generated masks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03551v1-abstract-full').style.display = 'none'; document.getElementById('2411.03551v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16726">arXiv:2410.16726</a> <span> [<a href="https://arxiv.org/pdf/2410.16726">pdf</a>, <a href="https://arxiv.org/format/2410.16726">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Low-Resource ASR through Versatile TTS: Bridging the Data Gap </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanrou Yang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+F">Fan Yu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Du%2C+Z">Zhihao Du</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zhifu Gao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Shiliang Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16726v1-abstract-short" style="display: inline;"> While automatic speech recognition (ASR) systems have achieved remarkable performance with large-scale datasets, their efficacy remains inadequate in low-resource settings, encompassing dialects, accents, minority languages, and long-tail hotwords, domains with significant practical relevance. With the advent of versatile and powerful text-to-speech (TTS) models, capable of generating speech with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16726v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16726v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16726v1-abstract-full" style="display: none;"> While automatic speech recognition (ASR) systems have achieved remarkable performance with large-scale datasets, their efficacy remains inadequate in low-resource settings, encompassing dialects, accents, minority languages, and long-tail hotwords, domains with significant practical relevance. With the advent of versatile and powerful text-to-speech (TTS) models, capable of generating speech with human-level naturalness, expressiveness, and diverse speaker profiles, leveraging TTS for ASR data augmentation provides a cost-effective and practical approach to enhancing ASR performance. Comprehensive experiments on an unprecedentedly rich variety of low-resource datasets demonstrate consistent and substantial performance improvements, proving that the proposed method of enhancing low-resource ASR through a versatile TTS model is highly effective and has broad application prospects. Furthermore, we delve deeper into key characteristics of synthesized speech data that contribute to ASR improvement, examining factors such as text diversity, speaker diversity, and the volume of synthesized data, with text diversity being studied for the first time in this work. We hope our findings provide helpful guidance and reference for the practical application of TTS-based data augmentation and push the advancement of low-resource ASR one step further. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16726v1-abstract-full').style.display = 'none'; document.getElementById('2410.16726v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13896">arXiv:2410.13896</a> <span> [<a href="https://arxiv.org/pdf/2410.13896">pdf</a>, <a href="https://arxiv.org/format/2410.13896">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> From Real Artifacts to Virtual Reference: A Robust Framework for Translating Endoscopic Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+J">Junyang Wu</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+F">Fangfang Xie</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jiayuan Sun</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+Y">Yun Gu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang-Zhong Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13896v2-abstract-short" style="display: inline;"> Domain adaptation, which bridges the distributions across different modalities, plays a crucial role in multimodal medical image analysis. In endoscopic imaging, combining pre-operative data with intra-operative imaging is important for surgical planning and navigation. However, existing domain adaptation methods are hampered by distribution shift caused by in vivo artifacts, necessitating robust… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13896v2-abstract-full').style.display = 'inline'; document.getElementById('2410.13896v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13896v2-abstract-full" style="display: none;"> Domain adaptation, which bridges the distributions across different modalities, plays a crucial role in multimodal medical image analysis. In endoscopic imaging, combining pre-operative data with intra-operative imaging is important for surgical planning and navigation. However, existing domain adaptation methods are hampered by distribution shift caused by in vivo artifacts, necessitating robust techniques for aligning noisy and artifact abundant patient endoscopic videos with clean virtual images reconstructed from pre-operative tomographic data for pose estimation during intraoperative guidance. This paper presents an artifact-resilient image translation method and an associated benchmark for this purpose. The method incorporates a novel ``local-global'' translation framework and a noise-resilient feature extraction strategy. For the former, it decouples the image translation process into a local step for feature denoising, and a global step for global style transfer. For feature extraction, a new contrastive learning strategy is proposed, which can extract noise-resilient features for establishing robust correspondence across domains. Detailed validation on both public and in-house clinical datasets has been conducted, demonstrating significantly improved performance compared to the current state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13896v2-abstract-full').style.display = 'none'; document.getElementById('2410.13896v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10551">arXiv:2410.10551</a> <span> [<a href="https://arxiv.org/pdf/2410.10551">pdf</a>, <a href="https://arxiv.org/format/2410.10551">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Preserving Cardiac Integrity: A Topology-Infused Approach to Whole Heart Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Chenyu Zhang</a>, <a href="/search/eess?searchtype=author&query=Guan%2C+W">Wenxue Guan</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10551v3-abstract-short" style="display: inline;"> Whole heart segmentation (WHS) supports cardiovascular disease (CVD) diagnosis, disease monitoring, treatment planning, and prognosis. Deep learning has become the most widely used method for WHS applications in recent years. However, segmentation of whole-heart structures faces numerous challenges including heart shape variability during the cardiac cycle, clinical artifacts like motion and poor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10551v3-abstract-full').style.display = 'inline'; document.getElementById('2410.10551v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10551v3-abstract-full" style="display: none;"> Whole heart segmentation (WHS) supports cardiovascular disease (CVD) diagnosis, disease monitoring, treatment planning, and prognosis. Deep learning has become the most widely used method for WHS applications in recent years. However, segmentation of whole-heart structures faces numerous challenges including heart shape variability during the cardiac cycle, clinical artifacts like motion and poor contrast-to-noise ratio, domain shifts in multi-center data, and the distinct modalities of CT and MRI. To address these limitations and improve segmentation quality, this paper introduces a new topology-preserving module that is integrated into deep neural networks. The implementation achieves anatomically plausible segmentation by using learned topology-preserving fields, which are based entirely on 3D convolution and are therefore very effective for 3D voxel data. We incorporate natural constraints between structures into the end-to-end training and enrich the feature representation of the neural network. The effectiveness of the proposed method is validated on an open-source medical heart dataset, specifically using the WHS++ data. The results demonstrate that the architecture performs exceptionally well, achieving a Dice coefficient of 0.939 during testing. This indicates full topology preservation for individual structures and significantly outperforms other baselines in preserving the overall scene topology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10551v3-abstract-full').style.display = 'none'; document.getElementById('2410.10551v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17705">arXiv:2409.17705</a> <span> [<a href="https://arxiv.org/pdf/2409.17705">pdf</a>, <a href="https://arxiv.org/format/2409.17705">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> On the Output Redundancy of LTI Systems: A Geometric Approach with Application to Privacy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guitao Yang</a>, <a href="/search/eess?searchtype=author&query=Gallo%2C+A+J">Alexander J. Gallo</a>, <a href="/search/eess?searchtype=author&query=Barboni%2C+A">Angelo Barboni</a>, <a href="/search/eess?searchtype=author&query=Ferrari%2C+R+M+G">Riccardo M. G. Ferrari</a>, <a href="/search/eess?searchtype=author&query=Serrani%2C+A">Andrea Serrani</a>, <a href="/search/eess?searchtype=author&query=Parisini%2C+T">Thomas Parisini</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17705v1-abstract-short" style="display: inline;"> This paper examines the properties of output-redundant systems, that is, systems possessing a larger number of outputs than inputs, through the lenses of the geometric approach of Wonham et al. We begin by formulating a simple output allocation synthesis problem, which involves ``concealing" input information from a malicious eavesdropper having access to the system output, while still allowing fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17705v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17705v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17705v1-abstract-full" style="display: none;"> This paper examines the properties of output-redundant systems, that is, systems possessing a larger number of outputs than inputs, through the lenses of the geometric approach of Wonham et al. We begin by formulating a simple output allocation synthesis problem, which involves ``concealing" input information from a malicious eavesdropper having access to the system output, while still allowing for a legitimate user to reconstruct it. It is shown that the solvability of this problem requires the availability of a redundant set of outputs. This very problem is instrumental to unveiling the fundamental geometric properties of output-redundant systems, which form the basis for our subsequent constructions and results. As a direct application, we demonstrate how output allocation can be employed to effectively protect the information of input information from certain output eavesdroppers with guaranteed results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17705v1-abstract-full').style.display = 'none'; document.getElementById('2409.17705v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16803">arXiv:2409.16803</a> <span> [<a href="https://arxiv.org/pdf/2409.16803">pdf</a>, <a href="https://arxiv.org/format/2409.16803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Incorporating Spatial Cues in Modular Speaker Diarization for Multi-channel Multi-party Meetings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+R">Ruoyu Wang</a>, <a href="/search/eess?searchtype=author&query=Niu%2C+S">Shutong Niu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Gaobin Yang</a>, <a href="/search/eess?searchtype=author&query=Du%2C+J">Jun Du</a>, <a href="/search/eess?searchtype=author&query=Qian%2C+S">Shuangqing Qian</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+T">Tian Gao</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+J">Jia Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16803v1-abstract-short" style="display: inline;"> Although fully end-to-end speaker diarization systems have made significant progress in recent years, modular systems often achieve superior results in real-world scenarios due to their greater adaptability and robustness. Historically, modular speaker diarization methods have seldom discussed how to leverage spatial cues from multi-channel speech. This paper proposes a three-stage modular system… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16803v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16803v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16803v1-abstract-full" style="display: none;"> Although fully end-to-end speaker diarization systems have made significant progress in recent years, modular systems often achieve superior results in real-world scenarios due to their greater adaptability and robustness. Historically, modular speaker diarization methods have seldom discussed how to leverage spatial cues from multi-channel speech. This paper proposes a three-stage modular system to enhance single-channel neural speaker diarization systems and recognition performance by utilizing spatial cues from multi-channel speech to provide more accurate initialization for each stage of neural speaker diarization (NSD) decoding: (1) Overlap detection and continuous speech separation (CSS) on multi-channel speech are used to obtain cleaner single speaker speech segments for clustering, followed by the first NSD decoding pass. (2) The results from the first pass initialize a complex Angular Central Gaussian Mixture Model (cACGMM) to estimate speaker-wise masks on multi-channel speech, and through Overlap-add and Mask-to-VAD, achieve initialization with lower speaker error (SpkErr), followed by the second NSD decoding pass. (3) The second decoding results are used for guided source separation (GSS), recognizing and filtering short segments containing less one word to obtain cleaner speech segments, followed by re-clustering and the final NSD decoding pass. We presented the progressively explored evaluation results from the CHiME-8 NOTSOFAR-1 (Natural Office Talkers in Settings Of Far-field Audio Recordings) challenge, demonstrating the effectiveness of our system and its contribution to improving recognition performance. Our final system achieved the first place in the challenge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16803v1-abstract-full').style.display = 'none'; document.getElementById('2409.16803v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, Submitted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03087">arXiv:2409.03087</a> <span> [<a href="https://arxiv.org/pdf/2409.03087">pdf</a>, <a href="https://arxiv.org/format/2409.03087">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Coupling AI and Citizen Science in Creation of Enhanced Training Dataset for Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Syahmi%2C+A">Amir Syahmi</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+X">Xiangrong Lu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yinxuan Li</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+H">Haoxuan Yao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+H">Hanjun Jiang</a>, <a href="/search/eess?searchtype=author&query=Acharya%2C+I">Ishita Acharya</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shiyi Wang</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03087v1-abstract-short" style="display: inline;"> Recent advancements in medical imaging and artificial intelligence (AI) have greatly enhanced diagnostic capabilities, but the development of effective deep learning (DL) models is still constrained by the lack of high-quality annotated datasets. The traditional manual annotation process by medical experts is time- and resource-intensive, limiting the scalability of these datasets. In this work, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03087v1-abstract-full').style.display = 'inline'; document.getElementById('2409.03087v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03087v1-abstract-full" style="display: none;"> Recent advancements in medical imaging and artificial intelligence (AI) have greatly enhanced diagnostic capabilities, but the development of effective deep learning (DL) models is still constrained by the lack of high-quality annotated datasets. The traditional manual annotation process by medical experts is time- and resource-intensive, limiting the scalability of these datasets. In this work, we introduce a robust and versatile framework that combines AI and crowdsourcing to improve both the quality and quantity of medical image datasets across different modalities. Our approach utilises a user-friendly online platform that enables a diverse group of crowd annotators to label medical images efficiently. By integrating the MedSAM segmentation AI with this platform, we accelerate the annotation process while maintaining expert-level quality through an algorithm that merges crowd-labelled images. Additionally, we employ pix2pixGAN, a generative AI model, to expand the training dataset with synthetic images that capture realistic morphological features. These methods are combined into a cohesive framework designed to produce an enhanced dataset, which can serve as a universal pre-processing pipeline to boost the training of any medical deep learning segmentation model. Our results demonstrate that this framework significantly improves model performance, especially when training data is limited. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03087v1-abstract-full').style.display = 'none'; document.getElementById('2409.03087v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02070">arXiv:2409.02070</a> <span> [<a href="https://arxiv.org/pdf/2409.02070">pdf</a>, <a href="https://arxiv.org/format/2409.02070">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Explicit Differentiable Slicing and Global Deformation for Cardiac Mesh Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yihao Luo</a>, <a href="/search/eess?searchtype=author&query=Sesia%2C+D">Dario Sesia</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yinzhe Wu</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+W">Wenhao Ding</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+F">Fadong Shi</a>, <a href="/search/eess?searchtype=author&query=Shah%2C+A">Anoop Shah</a>, <a href="/search/eess?searchtype=author&query=Kaural%2C+A">Amit Kaural</a>, <a href="/search/eess?searchtype=author&query=Mayet%2C+J">Jamil Mayet</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a>, <a href="/search/eess?searchtype=author&query=Yap%2C+C">ChoonHwai Yap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02070v2-abstract-short" style="display: inline;"> Mesh reconstruction of the cardiac anatomy from medical images is useful for shape and motion measurements and biophysics simulations to facilitate the assessment of cardiac function and health. However, 3D medical images are often acquired as 2D slices that are sparsely sampled and noisy, and mesh reconstruction on such data is a challenging task. Traditional voxel-based approaches rely on pre- a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02070v2-abstract-full').style.display = 'inline'; document.getElementById('2409.02070v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02070v2-abstract-full" style="display: none;"> Mesh reconstruction of the cardiac anatomy from medical images is useful for shape and motion measurements and biophysics simulations to facilitate the assessment of cardiac function and health. However, 3D medical images are often acquired as 2D slices that are sparsely sampled and noisy, and mesh reconstruction on such data is a challenging task. Traditional voxel-based approaches rely on pre- and post-processing that compromises image fidelity, while mesh-level deep learning approaches require mesh annotations that are difficult to get. Therefore, direct cross-domain supervision from 2D images to meshes is a key technique for advancing 3D learning in medical imaging, but it has not been well-developed. While there have been attempts to approximate the optimized meshes' slicing, few existing methods directly use 2D slices to supervise mesh reconstruction in a differentiable manner. Here, we propose a novel explicit differentiable voxelization and slicing (DVS) algorithm that allows gradient backpropagation to a mesh from its slices, facilitating refined mesh optimization directly supervised by the losses defined on 2D images. Further, we propose an innovative framework for extracting patient-specific left ventricle (LV) meshes from medical images by coupling DVS with a graph harmonic deformation (GHD) mesh morphing descriptor of cardiac shape that naturally preserves mesh quality and smoothness during optimization. Experimental results demonstrate that our method achieves state-of-the-art performance in cardiac mesh reconstruction tasks from CT and MRI, with an overall Dice score of 90% on multi-datasets, outperforming existing approaches. The proposed method can further quantify clinically useful parameters such as ejection fraction and global myocardial strains, closely matching the ground truth and surpassing the traditional voxel-based approach in sparse images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02070v2-abstract-full').style.display = 'none'; document.getElementById('2409.02070v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02041">arXiv:2409.02041</a> <span> [<a href="https://arxiv.org/pdf/2409.02041">pdf</a>, <a href="https://arxiv.org/format/2409.02041">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> The USTC-NERCSLIP Systems for the CHiME-8 NOTSOFAR-1 Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Niu%2C+S">Shutong Niu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+R">Ruoyu Wang</a>, <a href="/search/eess?searchtype=author&query=Du%2C+J">Jun Du</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Gaobin Yang</a>, <a href="/search/eess?searchtype=author&query=Tu%2C+Y">Yanhui Tu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+S">Siyuan Wu</a>, <a href="/search/eess?searchtype=author&query=Qian%2C+S">Shuangqing Qian</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+H">Huaxin Wu</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+H">Haitao Xu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xueyang Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhong%2C+G">Guolong Zhong</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+X">Xindi Yu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jieru Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+M">Mengzhi Wang</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+D">Di Cai</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+T">Tian Gao</a>, <a href="/search/eess?searchtype=author&query=Wan%2C+G">Genshun Wan</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+F">Feng Ma</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+J">Jia Pan</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+J">Jianqing Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02041v2-abstract-short" style="display: inline;"> This technical report outlines our submission system for the CHiME-8 NOTSOFAR-1 Challenge. The primary difficulty of this challenge is the dataset recorded across various conference rooms, which captures real-world complexities such as high overlap rates, background noises, a variable number of speakers, and natural conversation styles. To address these issues, we optimized the system in several a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02041v2-abstract-full').style.display = 'inline'; document.getElementById('2409.02041v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02041v2-abstract-full" style="display: none;"> This technical report outlines our submission system for the CHiME-8 NOTSOFAR-1 Challenge. The primary difficulty of this challenge is the dataset recorded across various conference rooms, which captures real-world complexities such as high overlap rates, background noises, a variable number of speakers, and natural conversation styles. To address these issues, we optimized the system in several aspects: For front-end speech signal processing, we introduced a data-driven joint training method for diarization and separation (JDS) to enhance audio quality. Additionally, we also integrated traditional guided source separation (GSS) for multi-channel track to provide complementary information for the JDS. For back-end speech recognition, we enhanced Whisper with WavLM, ConvNeXt, and Transformer innovations, applying multi-task training and Noise KLD augmentation, to significantly advance ASR robustness and accuracy. Our system attained a Time-Constrained minimum Permutation Word Error Rate (tcpWER) of 14.265% and 22.989% on the CHiME-8 NOTSOFAR-1 Dev-set-2 multi-channel and single-channel tracks, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02041v2-abstract-full').style.display = 'none'; document.getElementById('2409.02041v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01544">arXiv:2409.01544</a> <span> [<a href="https://arxiv.org/pdf/2409.01544">pdf</a>, <a href="https://arxiv.org/format/2409.01544">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning Task-Specific Sampling Strategy for Sparse-View CT Reconstruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+L">Liutao Yang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yingying Fang</a>, <a href="/search/eess?searchtype=author&query=Aviles-Rivero%2C+A+I">Angelica I Aviles-Rivero</a>, <a href="/search/eess?searchtype=author&query=Schonlieb%2C+C">Carola-Bibiane Schonlieb</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+D">Daoqiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01544v1-abstract-short" style="display: inline;"> Sparse-View Computed Tomography (SVCT) offers low-dose and fast imaging but suffers from severe artifacts. Optimizing the sampling strategy is an essential approach to improving the imaging quality of SVCT. However, current methods typically optimize a universal sampling strategy for all types of scans, overlooking the fact that the optimal strategy may vary depending on the specific scanning task… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01544v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01544v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01544v1-abstract-full" style="display: none;"> Sparse-View Computed Tomography (SVCT) offers low-dose and fast imaging but suffers from severe artifacts. Optimizing the sampling strategy is an essential approach to improving the imaging quality of SVCT. However, current methods typically optimize a universal sampling strategy for all types of scans, overlooking the fact that the optimal strategy may vary depending on the specific scanning task, whether it involves particular body scans (e.g., chest CT scans) or downstream clinical applications (e.g., disease diagnosis). The optimal strategy for one scanning task may not perform as well when applied to other tasks. To address this problem, we propose a deep learning framework that learns task-specific sampling strategies with a multi-task approach to train a unified reconstruction network while tailoring optimal sampling strategies for each individual task. Thus, a task-specific sampling strategy can be applied for each type of scans to improve the quality of SVCT imaging and further assist in performance of downstream clinical usage. Extensive experiments across different scanning types provide validation for the effectiveness of task-specific sampling strategies in enhancing imaging quality. Experiments involving downstream tasks verify the clinical value of learned sampling strategies, as evidenced by notable improvements in downstream task performance. Furthermore, the utilization of a multi-task framework with a shared reconstruction network facilitates deployment on current imaging devices with switchable task-specific modules, and allows for easily integrate new tasks without retraining the entire model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01544v1-abstract-full').style.display = 'none'; document.getElementById('2409.01544v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00078">arXiv:2409.00078</a> <span> [<a href="https://arxiv.org/pdf/2409.00078">pdf</a>, <a href="https://arxiv.org/format/2409.00078">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> SGP-RI: A Real-Time-Trainable and Decentralized IoT Indoor Localization Model Based on Sparse Gaussian Process with Reduced-Dimensional Inputs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tang%2C+Z">Zhe Tang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S">Sihao Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Z">Zichen Huang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guandong Yang</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+K+S">Kyeong Soo Kim</a>, <a href="/search/eess?searchtype=author&query=Smith%2C+J+S">Jeremy S. Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00078v1-abstract-short" style="display: inline;"> Internet of Things (IoT) devices are deployed in the filed, there is an enormous amount of untapped potential in local computing on those IoT devices. Harnessing this potential for indoor localization, therefore, becomes an exciting research area. Conventionally, the training and deployment of indoor localization models are based on centralized servers with substantial computational resources. Thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00078v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00078v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00078v1-abstract-full" style="display: none;"> Internet of Things (IoT) devices are deployed in the filed, there is an enormous amount of untapped potential in local computing on those IoT devices. Harnessing this potential for indoor localization, therefore, becomes an exciting research area. Conventionally, the training and deployment of indoor localization models are based on centralized servers with substantial computational resources. This centralized approach faces several challenges, including the database's inability to accommodate the dynamic and unpredictable nature of the indoor electromagnetic environment, the model retraining costs, and the susceptibility of centralized servers to security breaches. To mitigate these challenges we aim to amalgamate the offline and online phases of traditional indoor localization methods using a real-time-trainable and decentralized IoT indoor localization model based on Sparse Gaussian Process with Reduced-dimensional Inputs (SGP-RI), where the number and dimension of the input data are reduced through reference point and wireless access point filtering, respectively. The experimental results based on a multi-building and multi-floor static database as well as a single-building and single-floor dynamic database, demonstrate that the proposed SGP-RI model with less than half the training samples as inducing inputs can produce comparable localization performance to the standard Gaussian Process model with the whole training samples. The SGP-RI model enables the decentralization of indoor localization, facilitating its deployment to resource-constrained IoT devices, and thereby could provide enhanced security and privacy, reduced costs, and network dependency. Also, the model's capability of real-time training makes it possible to quickly adapt to the time-varying indoor electromagnetic environment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00078v1-abstract-full').style.display = 'none'; document.getElementById('2409.00078v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 4 figures, under review for journal publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05249">arXiv:2408.05249</a> <span> [<a href="https://arxiv.org/pdf/2408.05249">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Advancing oncology with federated learning: transcending boundaries in breast, lung, and prostate cancer. A systematic review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ankolekar%2C+A">Anshu Ankolekar</a>, <a href="/search/eess?searchtype=author&query=Boie%2C+S">Sebastian Boie</a>, <a href="/search/eess?searchtype=author&query=Abdollahyan%2C+M">Maryam Abdollahyan</a>, <a href="/search/eess?searchtype=author&query=Gadaleta%2C+E">Emanuela Gadaleta</a>, <a href="/search/eess?searchtype=author&query=Hasheminasab%2C+S+A">Seyed Alireza Hasheminasab</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a>, <a href="/search/eess?searchtype=author&query=Beauville%2C+C">Charles Beauville</a>, <a href="/search/eess?searchtype=author&query=Dikaios%2C+N">Nikolaos Dikaios</a>, <a href="/search/eess?searchtype=author&query=Kastis%2C+G+A">George Anthony Kastis</a>, <a href="/search/eess?searchtype=author&query=Bussmann%2C+M">Michael Bussmann</a>, <a href="/search/eess?searchtype=author&query=Khalid%2C+S">Sara Khalid</a>, <a href="/search/eess?searchtype=author&query=Kruger%2C+H">Hagen Kruger</a>, <a href="/search/eess?searchtype=author&query=Lambin%2C+P">Philippe Lambin</a>, <a href="/search/eess?searchtype=author&query=Papanastasiou%2C+G">Giorgos Papanastasiou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05249v1-abstract-short" style="display: inline;"> Federated Learning (FL) has emerged as a promising solution to address the limitations of centralised machine learning (ML) in oncology, particularly in overcoming privacy concerns and harnessing the power of diverse, multi-center data. This systematic review synthesises current knowledge on the state-of-the-art FL in oncology, focusing on breast, lung, and prostate cancer. Distinct from previous… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05249v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05249v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05249v1-abstract-full" style="display: none;"> Federated Learning (FL) has emerged as a promising solution to address the limitations of centralised machine learning (ML) in oncology, particularly in overcoming privacy concerns and harnessing the power of diverse, multi-center data. This systematic review synthesises current knowledge on the state-of-the-art FL in oncology, focusing on breast, lung, and prostate cancer. Distinct from previous surveys, our comprehensive review critically evaluates the real-world implementation and impact of FL on cancer care, demonstrating its effectiveness in enhancing ML generalisability, performance and data privacy in clinical settings and data. We evaluated state-of-the-art advances in FL, demonstrating its growing adoption amid tightening data privacy regulations. FL outperformed centralised ML in 15 out of the 25 studies reviewed, spanning diverse ML models and clinical applications, and facilitating integration of multi-modal information for precision medicine. Despite the current challenges identified in reproducibility, standardisation and methodology across studies, the demonstrable benefits of FL in harnessing real-world data and addressing clinical needs highlight its significant potential for advancing cancer research. We propose that future research should focus on addressing these limitations and investigating further advanced FL methods, to fully harness data diversity and realise the transformative power of cutting-edge FL in cancer care. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05249v1-abstract-full').style.display = 'none'; document.getElementById('2408.05249v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 Figures, 3 Tables, 1 Supplementary Table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00940">arXiv:2408.00940</a> <span> [<a href="https://arxiv.org/pdf/2408.00940">pdf</a>, <a href="https://arxiv.org/format/2408.00940">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A dual-task mutual learning framework for predicting post-thrombectomy cerebral hemorrhage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+C">Caiwen Jiang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+T">Tianyu Wang</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+M">Mianxin Liu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Z">Zhongxiang Ding</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+D">Dinggang Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00940v1-abstract-short" style="display: inline;"> Ischemic stroke is a severe condition caused by the blockage of brain blood vessels, and can lead to the death of brain tissue due to oxygen deprivation. Thrombectomy has become a common treatment choice for ischemic stroke due to its immediate effectiveness. But, it carries the risk of postoperative cerebral hemorrhage. Clinically, multiple CT scans within 0-72 hours post-surgery are used to moni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00940v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00940v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00940v1-abstract-full" style="display: none;"> Ischemic stroke is a severe condition caused by the blockage of brain blood vessels, and can lead to the death of brain tissue due to oxygen deprivation. Thrombectomy has become a common treatment choice for ischemic stroke due to its immediate effectiveness. But, it carries the risk of postoperative cerebral hemorrhage. Clinically, multiple CT scans within 0-72 hours post-surgery are used to monitor for hemorrhage. However, this approach exposes radiation dose to patients, and may delay the detection of cerebral hemorrhage. To address this dilemma, we propose a novel prediction framework for measuring postoperative cerebral hemorrhage using only the patient's initial CT scan. Specifically, we introduce a dual-task mutual learning framework to takes the initial CT scan as input and simultaneously estimates both the follow-up CT scan and prognostic label to predict the occurrence of postoperative cerebral hemorrhage. Our proposed framework incorporates two attention mechanisms, i.e., self-attention and interactive attention. Specifically, the self-attention mechanism allows the model to focus more on high-density areas in the image, which are critical for diagnosis (i.e., potential hemorrhage areas). The interactive attention mechanism further models the dependencies between the interrelated generation and classification tasks, enabling both tasks to perform better than the case when conducted individually. Validated on clinical data, our method can generate follow-up CT scans better than state-of-the-art methods, and achieves an accuracy of 86.37% in predicting follow-up prognostic labels. Thus, our work thus contributes to the timely screening of post-thrombectomy cerebral hemorrhage, and could significantly reform the clinical process of thrombectomy and other similar operations related to stroke. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00940v1-abstract-full').style.display = 'none'; document.getElementById('2408.00940v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00938">arXiv:2408.00938</a> <span> [<a href="https://arxiv.org/pdf/2408.00938">pdf</a>, <a href="https://arxiv.org/format/2408.00938">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CIResDiff: A Clinically-Informed Residual Diffusion Model for Predicting Idiopathic Pulmonary Fibrosis Progression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+C">Caiwen Jiang</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Ou%2C+Z">Zaixin Ou</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+M">Mianxin Liu</a>, <a href="/search/eess?searchtype=author&query=Simon%2C+W">Walsh Simon</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+D">Dinggang Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00938v2-abstract-short" style="display: inline;"> The progression of Idiopathic Pulmonary Fibrosis (IPF) significantly correlates with higher patient mortality rates. Early detection of IPF progression is critical for initiating timely treatment, which can effectively slow down the advancement of the disease. However, the current clinical criteria define disease progression requiring two CT scans with a one-year interval, presenting a dilemma: a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00938v2-abstract-full').style.display = 'inline'; document.getElementById('2408.00938v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00938v2-abstract-full" style="display: none;"> The progression of Idiopathic Pulmonary Fibrosis (IPF) significantly correlates with higher patient mortality rates. Early detection of IPF progression is critical for initiating timely treatment, which can effectively slow down the advancement of the disease. However, the current clinical criteria define disease progression requiring two CT scans with a one-year interval, presenting a dilemma: a disease progression is identified only after the disease has already progressed. To this end, in this paper, we develop a novel diffusion model to accurately predict the progression of IPF by generating patient's follow-up CT scan from the initial CT scan. Specifically, from the clinical prior knowledge, we tailor improvements to the traditional diffusion model and propose a Clinically-Informed Residual Diffusion model, called CIResDiff. The key innovations of CIResDiff include 1) performing the target region pre-registration to align the lung regions of two CT scans at different time points for reducing the generation difficulty, 2) adopting the residual diffusion instead of traditional diffusion to enable the model focus more on differences (i.e., lesions) between the two CT scans rather than the largely identical anatomical content, and 3) designing the clinically-informed process based on CLIP technology to integrate lung function information which is highly relevant to diagnosis into the reverse process for assisting generation. Extensive experiments on clinical data demonstrate that our approach can outperform state-of-the-art methods and effectively predict the progression of IPF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00938v2-abstract-full').style.display = 'none'; document.getElementById('2408.00938v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17882">arXiv:2407.17882</a> <span> [<a href="https://arxiv.org/pdf/2407.17882">pdf</a>, <a href="https://arxiv.org/format/2407.17882">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Artificial Immunofluorescence in a Flash: Rapid Synthetic Imaging from Brightfield Through Residual Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+C">Chunling Tang</a>, <a href="/search/eess?searchtype=author&query=Murdoch%2C+S">Siofra Murdoch</a>, <a href="/search/eess?searchtype=author&query=Papanastasiou%2C+G">Giorgos Papanastasiou</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yunzhe Guo</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xianglu Xiao</a>, <a href="/search/eess?searchtype=author&query=Cross-Zamirski%2C+J">Jan Cross-Zamirski</a>, <a href="/search/eess?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+K+X">Kristina Xiao Liang</a>, <a href="/search/eess?searchtype=author&query=Niu%2C+Z">Zhangming Niu</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+E+F">Evandro Fei Fang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yinhai Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17882v1-abstract-short" style="display: inline;"> Immunofluorescent (IF) imaging is crucial for visualizing biomarker expressions, cell morphology and assessing the effects of drug treatments on sub-cellular components. IF imaging needs extra staining process and often requiring cell fixation, therefore it may also introduce artefects and alter endogenouous cell morphology. Some IF stains are expensive or not readily available hence hindering exp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17882v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17882v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17882v1-abstract-full" style="display: none;"> Immunofluorescent (IF) imaging is crucial for visualizing biomarker expressions, cell morphology and assessing the effects of drug treatments on sub-cellular components. IF imaging needs extra staining process and often requiring cell fixation, therefore it may also introduce artefects and alter endogenouous cell morphology. Some IF stains are expensive or not readily available hence hindering experiments. Recent diffusion models, which synthesise high-fidelity IF images from easy-to-acquire brightfield (BF) images, offer a promising solution but are hindered by training instability and slow inference times due to the noise diffusion process. This paper presents a novel method for the conditional synthesis of IF images directly from BF images along with cell segmentation masks. Our approach employs a Residual Diffusion process that enhances stability and significantly reduces inference time. We performed a critical evaluation against other image-to-image synthesis models, including UNets, GANs, and advanced diffusion models. Our model demonstrates significant improvements in image quality (p<0.05 in MSE, PSNR, and SSIM), inference speed (26 times faster than competing diffusion models), and accurate segmentation results for both nuclei and cell bodies (0.77 and 0.63 mean IOU for nuclei and cell true positives, respectively). This paper is a substantial advancement in the field, providing robust and efficient tools for cell image analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17882v1-abstract-full').style.display = 'none'; document.getElementById('2407.17882v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14754">arXiv:2407.14754</a> <span> [<a href="https://arxiv.org/pdf/2407.14754">pdf</a>, <a href="https://arxiv.org/format/2407.14754">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Representing Topological Self-Similarity Using Fractal Feature Maps for Accurate Segmentation of Tubular Structures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiaxing Huang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yanfeng Zhou</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yaoru Luo</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+G">Guole Liu</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+H">Heng Guo</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Ge Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14754v1-abstract-short" style="display: inline;"> Accurate segmentation of long and thin tubular structures is required in a wide variety of areas such as biology, medicine, and remote sensing. The complex topology and geometry of such structures often pose significant technical challenges. A fundamental property of such structures is their topological self-similarity, which can be quantified by fractal features such as fractal dimension (FD). In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14754v1-abstract-full').style.display = 'inline'; document.getElementById('2407.14754v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14754v1-abstract-full" style="display: none;"> Accurate segmentation of long and thin tubular structures is required in a wide variety of areas such as biology, medicine, and remote sensing. The complex topology and geometry of such structures often pose significant technical challenges. A fundamental property of such structures is their topological self-similarity, which can be quantified by fractal features such as fractal dimension (FD). In this study, we incorporate fractal features into a deep learning model by extending FD to the pixel-level using a sliding window technique. The resulting fractal feature maps (FFMs) are then incorporated as additional input to the model and additional weight in the loss function to enhance segmentation performance by utilizing the topological self-similarity. Moreover, we extend the U-Net architecture by incorporating an edge decoder and a skeleton decoder to improve boundary accuracy and skeletal continuity of segmentation, respectively. Extensive experiments on five tubular structure datasets validate the effectiveness and robustness of our approach. Furthermore, the integration of FFMs with other popular segmentation models such as HR-Net also yields performance enhancement, suggesting FFM can be incorporated as a plug-in module with different model architectures. Code and data are openly accessible at https://github.com/cbmi-group/FFM-Multi-Decoder-Network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14754v1-abstract-full').style.display = 'none'; document.getElementById('2407.14754v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09507">arXiv:2407.09507</a> <span> [<a href="https://arxiv.org/pdf/2407.09507">pdf</a>, <a href="https://arxiv.org/format/2407.09507">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Can Generative AI Replace Immunofluorescent Staining Processes? A Comparison Study of Synthetically Generated CellPainting Images from Brightfield </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Murdoch%2C+S">Siofra Murdoch</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+C">Chunling Tang</a>, <a href="/search/eess?searchtype=author&query=Papanastasiou%2C+G">Giorgos Papanastasiou</a>, <a href="/search/eess?searchtype=author&query=Cross-Zamirski%2C+J">Jan Cross-Zamirski</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yunzhe Guo</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+X">Xianglu Xiao</a>, <a href="/search/eess?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yinhai Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09507v2-abstract-short" style="display: inline;"> Cell imaging assays utilizing fluorescence stains are essential for observing sub-cellular organelles and their responses to perturbations. Immunofluorescent staining process is routinely in labs, however the recent innovations in generative AI is challenging the idea of IF staining are required. This is especially true when the availability and cost of specific fluorescence dyes is a problem to s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09507v2-abstract-full').style.display = 'inline'; document.getElementById('2407.09507v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09507v2-abstract-full" style="display: none;"> Cell imaging assays utilizing fluorescence stains are essential for observing sub-cellular organelles and their responses to perturbations. Immunofluorescent staining process is routinely in labs, however the recent innovations in generative AI is challenging the idea of IF staining are required. This is especially true when the availability and cost of specific fluorescence dyes is a problem to some labs. Furthermore, staining process takes time and leads to inter-intra technician and hinders downstream image and data analysis, and the reusability of image data for other projects. Recent studies showed the use of generated synthetic immunofluorescence (IF) images from brightfield (BF) images using generative AI algorithms in the literature. Therefore, in this study, we benchmark and compare five models from three types of IF generation backbones, CNN, GAN, and diffusion models, using a publicly available dataset. This paper not only serves as a comparative study to determine the best-performing model but also proposes a comprehensive analysis pipeline for evaluating the efficacy of generators in IF image synthesis. We highlighted the potential of deep learning-based generators for IF image synthesis, while also discussed potential issues and future research directions. Although generative AI shows promise in simplifying cell phenotyping using only BF images with IF staining, further research and validations are needed to address the key challenges of model generalisability, batch effects, feature relevance and computational costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09507v2-abstract-full').style.display = 'none'; document.getElementById('2407.09507v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08167">arXiv:2407.08167</a> <span> [<a href="https://arxiv.org/pdf/2407.08167">pdf</a>, <a href="https://arxiv.org/format/2407.08167">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DSCENet: Dynamic Screening and Clinical-Enhanced Multimodal Fusion for MPNs Subtype Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yuan Zhang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+Y">Yaolei Qi</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+X">Xiaoming Qi</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+Y">Yongyue Wei</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanyu Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08167v1-abstract-short" style="display: inline;"> The precise subtype classification of myeloproliferative neoplasms (MPNs) based on multimodal information, which assists clinicians in diagnosis and long-term treatment plans, is of great clinical significance. However, it remains a great challenging task due to the lack of diagnostic representativeness for local patches and the absence of diagnostic-relevant features from a single modality. In th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08167v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08167v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08167v1-abstract-full" style="display: none;"> The precise subtype classification of myeloproliferative neoplasms (MPNs) based on multimodal information, which assists clinicians in diagnosis and long-term treatment plans, is of great clinical significance. However, it remains a great challenging task due to the lack of diagnostic representativeness for local patches and the absence of diagnostic-relevant features from a single modality. In this paper, we propose a Dynamic Screening and Clinical-Enhanced Network (DSCENet) for the subtype classification of MPNs on the multimodal fusion of whole slide images (WSIs) and clinical information. (1) A dynamic screening module is proposed to flexibly adapt the feature learning of local patches, reducing the interference of irrelevant features and enhancing their diagnostic representativeness. (2) A clinical-enhanced fusion module is proposed to integrate clinical indicators to explore complementary features across modalities, providing comprehensive diagnostic information. Our approach has been validated on the real clinical data, achieving an increase of 7.91% AUC and 16.89% accuracy compared with the previous state-of-the-art (SOTA) methods. The code is available at https://github.com/yuanzhang7/DSCENet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08167v1-abstract-full').style.display = 'none'; document.getElementById('2407.08167v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by MICCAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03542">arXiv:2407.03542</a> <span> [<a href="https://arxiv.org/pdf/2407.03542">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Probing Perfection: The Relentless Art of Meddling for Pulmonary Airway Segmentation from HRCT via a Human-AI Collaboration Based Active Learning Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shiyi Wang</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Sheng Zhang</a>, <a href="/search/eess?searchtype=author&query=Felder%2C+F">Federico Felder</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yingying Fang</a>, <a href="/search/eess?searchtype=author&query=Del+Ser%2C+J">Javier Del Ser</a>, <a href="/search/eess?searchtype=author&query=Walsh%2C+S+L+F">Simon L F Walsh</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03542v2-abstract-short" style="display: inline;"> In pulmonary tracheal segmentation, the scarcity of annotated data is a prevalent issue in medical segmentation. Additionally, Deep Learning (DL) methods face challenges: the opacity of 'black box' models and the need for performance enhancement. Our Human-Computer Interaction (HCI) based models (RS_UNet, LC_UNet, UUNet, and WD_UNet) address these challenges by combining diverse query strategies w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03542v2-abstract-full').style.display = 'inline'; document.getElementById('2407.03542v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03542v2-abstract-full" style="display: none;"> In pulmonary tracheal segmentation, the scarcity of annotated data is a prevalent issue in medical segmentation. Additionally, Deep Learning (DL) methods face challenges: the opacity of 'black box' models and the need for performance enhancement. Our Human-Computer Interaction (HCI) based models (RS_UNet, LC_UNet, UUNet, and WD_UNet) address these challenges by combining diverse query strategies with various DL models. We train four HCI models and repeat these steps: (1) Query Strategy: The HCI models select samples that provide the most additional representative information when labeled in each iteration and identify unlabeled samples with the greatest predictive disparity using Wasserstein Distance, Least Confidence, Entropy Sampling, and Random Sampling. (2) Central line correction: Selected samples are used for expert correction of system-generated tracheal central lines in each training round. (3) Update training dataset: Experts update the training dataset after each DL model's training epoch, enhancing the trustworthiness and performance of the models. (4) Model training: The HCI model is trained using the updated dataset and an enhanced UNet version. Experimental results confirm the effectiveness of these HCI-based approaches, showing that WD-UNet, LC-UNet, UUNet, and RS-UNet achieve comparable or superior performance to state-of-the-art DL models. Notably, WD-UNet achieves this with only 15%-35% of the training data, reducing physician annotation time by 65%-85%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03542v2-abstract-full').style.display = 'none'; document.getElementById('2407.03542v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19043">arXiv:2406.19043</a> <span> [<a href="https://arxiv.org/pdf/2406.19043">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> CMRxRecon2024: A Multi-Modality, Multi-View K-Space Dataset Boosting Universal Machine Learning for Accelerated Cardiac MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zi Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+C">Chen Qin</a>, <a href="/search/eess?searchtype=author&query=Lyu%2C+J">Jun Lyu</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+O">Ouyang Cheng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+M">Mengyao Yu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Haoyu Zhang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+K">Kunyuan Guo</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhang Shi</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qirong Li</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Z">Ziqiang Xu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yajing Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/eess?searchtype=author&query=Hua%2C+S">Sha Hua</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+B">Binghua Chen</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+L">Longyu Sun</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+M">Mengting Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qin Li</a>, <a href="/search/eess?searchtype=author&query=Chu%2C+Y">Ying-Hua Chu</a>, <a href="/search/eess?searchtype=author&query=Bai%2C+W">Wenjia Bai</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+J">Jing Qin</a>, <a href="/search/eess?searchtype=author&query=Zhuang%2C+X">Xiahai Zhuang</a>, <a href="/search/eess?searchtype=author&query=Prieto%2C+C">Claudia Prieto</a> , et al. (7 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19043v1-abstract-short" style="display: inline;"> Cardiac magnetic resonance imaging (MRI) has emerged as a clinically gold-standard technique for diagnosing cardiac diseases, thanks to its ability to provide diverse information with multiple modalities and anatomical views. Accelerated cardiac MRI is highly expected to achieve time-efficient and patient-friendly imaging, and then advanced image reconstruction approaches are required to recover h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19043v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19043v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19043v1-abstract-full" style="display: none;"> Cardiac magnetic resonance imaging (MRI) has emerged as a clinically gold-standard technique for diagnosing cardiac diseases, thanks to its ability to provide diverse information with multiple modalities and anatomical views. Accelerated cardiac MRI is highly expected to achieve time-efficient and patient-friendly imaging, and then advanced image reconstruction approaches are required to recover high-quality, clinically interpretable images from undersampled measurements. However, the lack of publicly available cardiac MRI k-space dataset in terms of both quantity and diversity has severely hindered substantial technological progress, particularly for data-driven artificial intelligence. Here, we provide a standardized, diverse, and high-quality CMRxRecon2024 dataset to facilitate the technical development, fair evaluation, and clinical transfer of cardiac MRI reconstruction approaches, towards promoting the universal frameworks that enable fast and robust reconstructions across different cardiac MRI protocols in clinical practice. To the best of our knowledge, the CMRxRecon2024 dataset is the largest and most diverse publicly available cardiac k-space dataset. It is acquired from 330 healthy volunteers, covering commonly used modalities, anatomical views, and acquisition trajectories in clinical cardiac MRI workflows. Besides, an open platform with tutorials, benchmarks, and data processing tools is provided to facilitate data usage, advanced method development, and fair performance evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19043v1-abstract-full').style.display = 'none'; document.getElementById('2406.19043v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 3 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17173">arXiv:2406.17173</a> <span> [<a href="https://arxiv.org/pdf/2406.17173">pdf</a>, <a href="https://arxiv.org/format/2406.17173">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Diff3Dformer: Leveraging Slice Sequence Diffusion for Enhanced 3D CT Classification with Transformer Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jin%2C+Z">Zihao Jin</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yingying Fang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+C">Caiwen Xu</a>, <a href="/search/eess?searchtype=author&query=Walsh%2C+S">Simon Walsh</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17173v2-abstract-short" style="display: inline;"> The manifestation of symptoms associated with lung diseases can vary in different depths for individual patients, highlighting the significance of 3D information in CT scans for medical image classification. While Vision Transformer has shown superior performance over convolutional neural networks in image classification tasks, their effectiveness is often demonstrated on sufficiently large 2D dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17173v2-abstract-full').style.display = 'inline'; document.getElementById('2406.17173v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17173v2-abstract-full" style="display: none;"> The manifestation of symptoms associated with lung diseases can vary in different depths for individual patients, highlighting the significance of 3D information in CT scans for medical image classification. While Vision Transformer has shown superior performance over convolutional neural networks in image classification tasks, their effectiveness is often demonstrated on sufficiently large 2D datasets and they easily encounter overfitting issues on small medical image datasets. To address this limitation, we propose a Diffusion-based 3D Vision Transformer (Diff3Dformer), which utilizes the latent space of the Diffusion model to form the slice sequence for 3D analysis and incorporates clustering attention into ViT to aggregate repetitive information within 3D CT scans, thereby harnessing the power of the advanced transformer in 3D classification tasks on small datasets. Our method exhibits improved performance on two different scales of small datasets of 3D lung CT scans, surpassing the state of the art 3D methods and other transformer-based approaches that emerged during the COVID-19 pandemic, demonstrating its robust and superior performance across different scales of data. Experimental results underscore the superiority of our proposed method, indicating its potential for enhancing medical image classification tasks in real-world scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17173v2-abstract-full').style.display = 'none'; document.getElementById('2406.17173v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16189">arXiv:2406.16189</a> <span> [<a href="https://arxiv.org/pdf/2406.16189">pdf</a>, <a href="https://arxiv.org/format/2406.16189">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Fuzzy Attention-based Border Rendering Network for Lung Organ Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Sheng Zhang</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yingying Fang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shiyi Wang</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zhifan Gao</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16189v2-abstract-short" style="display: inline;"> Automatic lung organ segmentation on CT images is crucial for lung disease diagnosis. However, the unlimited voxel values and class imbalance of lung organs can lead to false-negative/positive and leakage issues in advanced methods. Additionally, some slender lung organs are easily lost during the recycled down/up-sample procedure, e.g., bronchioles & arterioles, causing severe discontinuity issue… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16189v2-abstract-full').style.display = 'inline'; document.getElementById('2406.16189v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16189v2-abstract-full" style="display: none;"> Automatic lung organ segmentation on CT images is crucial for lung disease diagnosis. However, the unlimited voxel values and class imbalance of lung organs can lead to false-negative/positive and leakage issues in advanced methods. Additionally, some slender lung organs are easily lost during the recycled down/up-sample procedure, e.g., bronchioles & arterioles, causing severe discontinuity issue. Inspired by these, this paper introduces an effective lung organ segmentation method called Fuzzy Attention-based Border Rendering (FABR) network. Since fuzzy logic can handle the uncertainty in feature extraction, hence the fusion of deep networks and fuzzy sets should be a viable solution for better performance. Meanwhile, unlike prior top-tier methods that operate on all regular dense points, our FABR depicts lung organ regions as cube-trees, focusing only on recycle-sampled border vulnerable points, rendering the severely discontinuous, false-negative/positive organ regions with a novel Global-Local Cube-tree Fusion (GLCF) module. All experimental results, on four challenging datasets of airway & artery, demonstrate that our method can achieve the favorable performance significantly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16189v2-abstract-full').style.display = 'none'; document.getElementById('2406.16189v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15752">arXiv:2406.15752</a> <span> [<a href="https://arxiv.org/pdf/2406.15752">pdf</a>, <a href="https://arxiv.org/format/2406.15752">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TacoLM: GaTed Attention Equipped Codec Language Model are Efficient Zero-Shot Text to Speech Synthesizers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Song%2C+Y">Yakun Song</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhuo Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaofei Wang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanrou Yang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15752v1-abstract-short" style="display: inline;"> Neural codec language model (LM) has demonstrated strong capability in zero-shot text-to-speech (TTS) synthesis. However, the codec LM often suffers from limitations in inference speed and stability, due to its auto-regressive nature and implicit alignment between text and audio. In this work, to handle these challenges, we introduce a new variant of neural codec LM, namely TacoLM. Specifically, T… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15752v1-abstract-full').style.display = 'inline'; document.getElementById('2406.15752v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15752v1-abstract-full" style="display: none;"> Neural codec language model (LM) has demonstrated strong capability in zero-shot text-to-speech (TTS) synthesis. However, the codec LM often suffers from limitations in inference speed and stability, due to its auto-regressive nature and implicit alignment between text and audio. In this work, to handle these challenges, we introduce a new variant of neural codec LM, namely TacoLM. Specifically, TacoLM introduces a gated attention mechanism to improve the training and inference efficiency and reduce the model size. Meanwhile, an additional gated cross-attention layer is included for each decoder layer, which improves the efficiency and content accuracy of the synthesized speech. In the evaluation of the Librispeech corpus, the proposed TacoLM achieves a better word error rate, speaker similarity, and mean opinion score, with 90% fewer parameters and 5.2 times speed up, compared with VALL-E. Demo and code is available at https://ereboas.github.io/TacoLM/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15752v1-abstract-full').style.display = 'none'; document.getElementById('2406.15752v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">INTERSPEECH 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13788">arXiv:2406.13788</a> <span> [<a href="https://arxiv.org/pdf/2406.13788">pdf</a>, <a href="https://arxiv.org/format/2406.13788">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Groupwise Deformable Registration of Diffusion Tensor Cardiovascular Magnetic Resonance: Disentangling Diffusion Contrast, Respiratory and Cardiac Motions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yihao Luo</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+K">Ke Wen</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Ferreira%2C+P+F">Pedro F. Ferreira</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yaqing Luo</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yinzhe Wu</a>, <a href="/search/eess?searchtype=author&query=Munoz%2C+C">Camila Munoz</a>, <a href="/search/eess?searchtype=author&query=Pennell%2C+D+J">Dudley J. Pennell</a>, <a href="/search/eess?searchtype=author&query=Scott%2C+A+D">Andrew D. Scott</a>, <a href="/search/eess?searchtype=author&query=Nielles-Vallespin%2C+S">Sonia Nielles-Vallespin</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13788v2-abstract-short" style="display: inline;"> Diffusion tensor based cardiovascular magnetic resonance (DT-CMR) offers a non-invasive method to visualize the myocardial microstructure. With the assumption that the heart is stationary, frames are acquired with multiple repetitions for different diffusion encoding directions. However, motion from poor breath-holding and imprecise cardiac triggering complicates DT-CMR analysis, further challenge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13788v2-abstract-full').style.display = 'inline'; document.getElementById('2406.13788v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13788v2-abstract-full" style="display: none;"> Diffusion tensor based cardiovascular magnetic resonance (DT-CMR) offers a non-invasive method to visualize the myocardial microstructure. With the assumption that the heart is stationary, frames are acquired with multiple repetitions for different diffusion encoding directions. However, motion from poor breath-holding and imprecise cardiac triggering complicates DT-CMR analysis, further challenged by its inherently low SNR, varied contrasts, and diffusion induced textures. Our solution is a novel framework employing groupwise registration with an implicit template to isolate respiratory and cardiac motions, while a tensor-embedded branch preserves diffusion contrast textures. We have devised a loss refinement tailored for non-linear least squares fitting and low SNR conditions. Additionally, we introduce new physics-based and clinical metrics for performance evaluation. Access code and supplementary materials at: https://github.com/ayanglab/DTCMR-Reg <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13788v2-abstract-full').style.display = 'none'; document.getElementById('2406.13788v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13708">arXiv:2406.13708</a> <span> [<a href="https://arxiv.org/pdf/2406.13708">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> Low-rank based motion correction followed by automatic frame selection in DT-CMR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Ferreira%2C+P+F">Pedro F. Ferreira</a>, <a href="/search/eess?searchtype=author&query=Munoz%2C+C">Camila Munoz</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+K">Ke Wen</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yaqing Luo</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yinzhe Wu</a>, <a href="/search/eess?searchtype=author&query=Pennell%2C+D+J">Dudley J. Pennell</a>, <a href="/search/eess?searchtype=author&query=Scott%2C+A+D">Andrew D. Scott</a>, <a href="/search/eess?searchtype=author&query=Nielles-Vallespin%2C+S">Sonia Nielles-Vallespin</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13708v1-abstract-short" style="display: inline;"> Motivation: Post-processing of in-vivo diffusion tensor CMR (DT-CMR) is challenging due to the low SNR and variation in contrast between frames which makes image registration difficult, and the need to manually reject frames corrupted by motion. Goals: To develop a semi-automatic post-processing pipeline for robust DT-CMR registration and automatic frame selection. Approach: We used low intrinsic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13708v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13708v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13708v1-abstract-full" style="display: none;"> Motivation: Post-processing of in-vivo diffusion tensor CMR (DT-CMR) is challenging due to the low SNR and variation in contrast between frames which makes image registration difficult, and the need to manually reject frames corrupted by motion. Goals: To develop a semi-automatic post-processing pipeline for robust DT-CMR registration and automatic frame selection. Approach: We used low intrinsic rank averaged frames as the reference to register other low-ranked frames. A myocardium-guided frame selection rejected the frames with signal loss, through-plane motion and poor registration. Results: The proposed method outperformed our previous noise-robust rigid registration on helix angle data quality and reduced negative eigenvalues in healthy volunteers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13708v1-abstract-full').style.display = 'none'; document.getElementById('2406.13708v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as ISMRM 2024 Digital poster 2141</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ISMRM 2024 Digital poster 2141 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08887">arXiv:2406.08887</a> <span> [<a href="https://arxiv.org/pdf/2406.08887">pdf</a>, <a href="https://arxiv.org/format/2406.08887">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Low-Overhead Channel Estimation via 3D Extrapolation for TDD mmWave Massive MIMO Systems Under High-Mobility Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhou%2C+B">Binggui Zhou</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+X">Xi Yang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+F">Feifei Gao</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08887v1-abstract-short" style="display: inline;"> In TDD mmWave massive MIMO systems, the downlink CSI can be attained through uplink channel estimation thanks to the uplink-downlink channel reciprocity. However, the channel aging issue is significant under high-mobility scenarios and thus necessitates frequent uplink channel estimation. In addition, large amounts of antennas and subcarriers lead to high-dimensional CSI matrices, aggravating the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08887v1-abstract-full').style.display = 'inline'; document.getElementById('2406.08887v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08887v1-abstract-full" style="display: none;"> In TDD mmWave massive MIMO systems, the downlink CSI can be attained through uplink channel estimation thanks to the uplink-downlink channel reciprocity. However, the channel aging issue is significant under high-mobility scenarios and thus necessitates frequent uplink channel estimation. In addition, large amounts of antennas and subcarriers lead to high-dimensional CSI matrices, aggravating the pilot training overhead. To systematically reduce the pilot overhead, a spatial, frequency, and temporal domain (3D) channel extrapolation framework is proposed in this paper. Considering the marginal effects of pilots in the spatial and frequency domains and the effectiveness of traditional knowledge-driven channel estimation methods, we first propose a knowledge-and-data driven spatial-frequency channel extrapolation network (KDD-SFCEN) for uplink channel estimation by exploiting the least square estimator for coarse channel estimation and joint spatial-frequency channel extrapolation to reduce the spatial-frequency domain pilot overhead. Then, resorting to the uplink-downlink channel reciprocity and temporal domain dependencies of downlink channels, a temporal uplink-downlink channel extrapolation network (TUDCEN) is proposed for slot-level channel extrapolation, aiming to enlarge the pilot signal period and thus reduce the temporal domain pilot overhead under high-mobility scenarios. Specifically, we propose the spatial-frequency sampling embedding module to reduce the representation dimension and consequent computational complexity, and we propose to exploit the autoregressive generative Transformer for generating downlink channels autoregressively. Numerical results demonstrate the superiority of the proposed framework in significantly reducing the pilot training overhead by more than 16 times and improving the system's spectral efficiency under high-mobility scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08887v1-abstract-full').style.display = 'none'; document.getElementById('2406.08887v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 11 figures, 3 tables. This paper has been submitted to IEEE journal for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05839">arXiv:2406.05839</a> <span> [<a href="https://arxiv.org/pdf/2406.05839">pdf</a>, <a href="https://arxiv.org/format/2406.05839">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.21437/Interspeech.2024">10.21437/Interspeech.2024 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MaLa-ASR: Multimedia-Assisted LLM-Based ASR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanrou Yang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+F">Fan Yu</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zhifu Gao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Shiliang Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05839v2-abstract-short" style="display: inline;"> As more and more information-rich data like video become available, utilizing multi-modal auxiliary information to enhance audio tasks has sparked widespread research interest. The recent surge in research on LLM-based audio models provides fresh perspectives for tackling audio tasks. Given that LLM can flexibly ingest multiple inputs, we propose MaLa-ASR, an LLM-based ASR model that can integrate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05839v2-abstract-full').style.display = 'inline'; document.getElementById('2406.05839v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05839v2-abstract-full" style="display: none;"> As more and more information-rich data like video become available, utilizing multi-modal auxiliary information to enhance audio tasks has sparked widespread research interest. The recent surge in research on LLM-based audio models provides fresh perspectives for tackling audio tasks. Given that LLM can flexibly ingest multiple inputs, we propose MaLa-ASR, an LLM-based ASR model that can integrate textual keywords extracted from presentation slides to improve recognition of conference content. MaLa-ASR yields average WERs of 9.4% and 11.7% on the L95 and S95 subsets of the SlideSpeech corpus, representing a significant relative WER drop of 27.9% and 44.7% over the baseline model reported in SlideSpeech. MaLa-ASR underscores LLM's strong performance in speech tasks and the capability to integrate auxiliary information conveniently. By adding keywords to the input prompt, the biased word error rate (B-WER) reduces relatively by 46.0% and 44.2%, establishing a new SOTA on this dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05839v2-abstract-full').style.display = 'none'; document.getElementById('2406.05839v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17659">arXiv:2405.17659</a> <span> [<a href="https://arxiv.org/pdf/2405.17659">pdf</a>, <a href="https://arxiv.org/format/2405.17659">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Global Sensitivity and Uncertainty Quantification in Medical Image Reconstruction with Monte Carlo Arbitrary-Masked Mamba </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+L">Liutao Yang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+W">Weiwen Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Chengyan Wang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+K">Kuangyu Shi</a>, <a href="/search/eess?searchtype=author&query=Aviles-Rivero%2C+A+I">Angelica I. Aviles-Rivero</a>, <a href="/search/eess?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+D">Daoqiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17659v2-abstract-short" style="display: inline;"> Deep learning has been extensively applied in medical image reconstruction, where Convolutional Neural Networks (CNNs) and Vision Transformers (ViTs) represent the predominant paradigms, each possessing distinct advantages and inherent limitations: CNNs exhibit linear complexity with local sensitivity, whereas ViTs demonstrate quadratic complexity with global sensitivity. The emerging Mamba has sh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17659v2-abstract-full').style.display = 'inline'; document.getElementById('2405.17659v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17659v2-abstract-full" style="display: none;"> Deep learning has been extensively applied in medical image reconstruction, where Convolutional Neural Networks (CNNs) and Vision Transformers (ViTs) represent the predominant paradigms, each possessing distinct advantages and inherent limitations: CNNs exhibit linear complexity with local sensitivity, whereas ViTs demonstrate quadratic complexity with global sensitivity. The emerging Mamba has shown superiority in learning visual representation, which combines the advantages of linear scalability and global sensitivity. In this study, we introduce MambaMIR, an Arbitrary-Masked Mamba-based model with wavelet decomposition for joint medical image reconstruction and uncertainty estimation. A novel Arbitrary Scan Masking (ASM) mechanism "masks out" redundant information to introduce randomness for further uncertainty estimation. Compared to the commonly used Monte Carlo (MC) dropout, our proposed MC-ASM provides an uncertainty map without the need for hyperparameter tuning and mitigates the performance drop typically observed when applying dropout to low-level tasks. For further texture preservation and better perceptual quality, we employ the wavelet transformation into MambaMIR and explore its variant based on the Generative Adversarial Network, namely MambaMIR-GAN. Comprehensive experiments have been conducted for multiple representative medical image reconstruction tasks, demonstrating that the proposed MambaMIR and MambaMIR-GAN outperform other baseline and state-of-the-art methods in different reconstruction tasks, where MambaMIR achieves the best reconstruction fidelity and MambaMIR-GAN has the best perceptual quality. In addition, our MC-ASM provides uncertainty maps as an additional tool for clinicians, while mitigating the typical performance drop caused by the commonly used dropout. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17659v2-abstract-full').style.display = 'none'; document.getElementById('2405.17659v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.15241">arXiv:2405.15241</a> <span> [<a href="https://arxiv.org/pdf/2405.15241">pdf</a>, <a href="https://arxiv.org/format/2405.15241">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Blaze3DM: Marry Triplane Representation with Diffusion for 3D Medical Inverse Problem Solving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=He%2C+J">Jia He</a>, <a href="/search/eess?searchtype=author&query=Li%2C+B">Bonan Li</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Ge Yang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Ziwen Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.15241v1-abstract-short" style="display: inline;"> Solving 3D medical inverse problems such as image restoration and reconstruction is crucial in modern medical field. However, the curse of dimensionality in 3D medical data leads mainstream volume-wise methods to suffer from high resource consumption and challenges models to successfully capture the natural distribution, resulting in inevitable volume inconsistency and artifacts. Some recent works… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15241v1-abstract-full').style.display = 'inline'; document.getElementById('2405.15241v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.15241v1-abstract-full" style="display: none;"> Solving 3D medical inverse problems such as image restoration and reconstruction is crucial in modern medical field. However, the curse of dimensionality in 3D medical data leads mainstream volume-wise methods to suffer from high resource consumption and challenges models to successfully capture the natural distribution, resulting in inevitable volume inconsistency and artifacts. Some recent works attempt to simplify generation in the latent space but lack the capability to efficiently model intricate image details. To address these limitations, we present Blaze3DM, a novel approach that enables fast and high-fidelity generation by integrating compact triplane neural field and powerful diffusion model. In technique, Blaze3DM begins by optimizing data-dependent triplane embeddings and a shared decoder simultaneously, reconstructing each triplane back to the corresponding 3D volume. To further enhance 3D consistency, we introduce a lightweight 3D aware module to model the correlation of three vertical planes. Then, diffusion model is trained on latent triplane embeddings and achieves both unconditional and conditional triplane generation, which is finally decoded to arbitrary size volume. Extensive experiments on zero-shot 3D medical inverse problem solving, including sparse-view CT, limited-angle CT, compressed-sensing MRI, and MRI isotropic super-resolution, demonstrate that Blaze3DM not only achieves state-of-the-art performance but also markedly improves computational efficiency over existing methods (22~40x faster than previous work). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15241v1-abstract-full').style.display = 'none'; document.getElementById('2405.15241v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09443">arXiv:2405.09443</a> <span> [<a href="https://arxiv.org/pdf/2405.09443">pdf</a>, <a href="https://arxiv.org/format/2405.09443">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Low-Complexity Joint Azimuth-Range-Velocity Estimation for Integrated Sensing and Communication with OFDM Waveform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Gang Yang</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+Q">Qibin Ye</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yixuan Huang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+S">Su Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09443v1-abstract-short" style="display: inline;"> Integrated sensing and communication (ISAC) is a main application scenario of the sixth-generation mobile communication systems. Due to the fast-growing number of antennas and subcarriers in cellular systems, the computational complexity of joint azimuth-range-velocity estimation (JARVE) in ISAC systems is extremely high. This paper studies the JARVE problem for a monostatic ISAC system with ortho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09443v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09443v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09443v1-abstract-full" style="display: none;"> Integrated sensing and communication (ISAC) is a main application scenario of the sixth-generation mobile communication systems. Due to the fast-growing number of antennas and subcarriers in cellular systems, the computational complexity of joint azimuth-range-velocity estimation (JARVE) in ISAC systems is extremely high. This paper studies the JARVE problem for a monostatic ISAC system with orthogonal frequency division multiplexing (OFDM) waveform, in which a base station receives the echos of its transmitted cellular OFDM signals to sense multiple targets. The Cramer-Rao bounds are first derived for JARVE. A low-complexity algorithm is further designed for super-resolution JARVE, which utilizes the proposed iterative subspace update scheme and Levenberg-Marquardt optimization method to replace the exhaustive search of spatial spectrum in multiple-signal-classification (MUSIC) algorithm. Finally, with the practical parameters of 5G New Radio, simulation results verify that the proposed algorithm can reduce the computational complexity by three orders of magnitude and two orders of magnitude compared to the existing three-dimensional MUSIC algorithm and estimation-of-signal-parameters-using-rotational-invariance-techniques (ESPRIT) algorithm, respectively, and also improve the estimation performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09443v1-abstract-full').style.display = 'none'; document.getElementById('2405.09443v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 12 figures, submitted to IEEE journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.05030">arXiv:2405.05030</a> <span> [<a href="https://arxiv.org/pdf/2405.05030">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5194/wes-2024-61">10.5194/wes-2024-61 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Functional Specifications and Testing Requirements of Grid-Forming Type-IV Offshore Wind Power </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ghimire%2C+S">Sulav Ghimire</a>, <a href="/search/eess?searchtype=author&query=Guerreiro%2C+G+M+G">Gabriel M. G. Guerreiro</a>, <a href="/search/eess?searchtype=author&query=K.%2C+K+V">Kanakesh V. K.</a>, <a href="/search/eess?searchtype=author&query=Guest%2C+E+D">Emerson D. Guest</a>, <a href="/search/eess?searchtype=author&query=Jensen%2C+K+H">Kim H. Jensen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guangya Yang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiongfei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.05030v1-abstract-short" style="display: inline;"> Throughout the past few years, various transmission system operators (TSOs) and research institutes have defined several functional specifications for grid-forming (GFM) converters via grid codes, white papers, and technical documents. These institutes and organisations also proposed testing requirements for general inverter-based resources (IBRs) and specific GFM converters. This paper initially… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05030v1-abstract-full').style.display = 'inline'; document.getElementById('2405.05030v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.05030v1-abstract-full" style="display: none;"> Throughout the past few years, various transmission system operators (TSOs) and research institutes have defined several functional specifications for grid-forming (GFM) converters via grid codes, white papers, and technical documents. These institutes and organisations also proposed testing requirements for general inverter-based resources (IBRs) and specific GFM converters. This paper initially reviews functional specifications and testing requirements from several sources to create an understanding of GFM capabilities in general. Furthermore, it proposes an outlook of the defined GFM capabilities, functional specifications, and testing requirements for offshore wind power plant (OF WPP) applications from an original equipment manufacturer (OEM) perspective. Finally, this paper briefly establishes the relevance of new testing methodologies for equipment-level certification and model validation, focusing on GFM functional specifications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.05030v1-abstract-full').style.display = 'none'; document.getElementById('2405.05030v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> WES-2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01082">arXiv:2404.01082</a> <span> [<a href="https://arxiv.org/pdf/2404.01082">pdf</a>, <a href="https://arxiv.org/format/2404.01082">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> The state-of-the-art in Cardiac MRI Reconstruction: Results of the CMRxRecon Challenge in MICCAI 2023 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lyu%2C+J">Jun Lyu</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+C">Chen Qin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zi Wang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+K">Kunyuan Guo</a>, <a href="/search/eess?searchtype=author&query=Ouyang%2C+C">Cheng Ouyang</a>, <a href="/search/eess?searchtype=author&query=T%C3%A4nzer%2C+M">Michael T盲nzer</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+M">Meng Liu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+L">Longyu Sun</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+M">Mengting Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qin Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhang Shi</a>, <a href="/search/eess?searchtype=author&query=Hua%2C+S">Sha Hua</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhensen Chen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhenlin Zhang</a>, <a href="/search/eess?searchtype=author&query=Xin%2C+B">Bingyu Xin</a>, <a href="/search/eess?searchtype=author&query=Metaxas%2C+D+N">Dimitris N. Metaxas</a>, <a href="/search/eess?searchtype=author&query=Yiasemis%2C+G">George Yiasemis</a>, <a href="/search/eess?searchtype=author&query=Teuwen%2C+J">Jonas Teuwen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Liping Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weitian Chen</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+Y">Yidong Zhao</a> , et al. (25 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01082v2-abstract-short" style="display: inline;"> Cardiac MRI, crucial for evaluating heart structure and function, faces limitations like slow imaging and motion artifacts. Undersampling reconstruction, especially data-driven algorithms, has emerged as a promising solution to accelerate scans and enhance imaging performance using highly under-sampled data. Nevertheless, the scarcity of publicly available cardiac k-space datasets and evaluation p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01082v2-abstract-full').style.display = 'inline'; document.getElementById('2404.01082v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01082v2-abstract-full" style="display: none;"> Cardiac MRI, crucial for evaluating heart structure and function, faces limitations like slow imaging and motion artifacts. Undersampling reconstruction, especially data-driven algorithms, has emerged as a promising solution to accelerate scans and enhance imaging performance using highly under-sampled data. Nevertheless, the scarcity of publicly available cardiac k-space datasets and evaluation platform hinder the development of data-driven reconstruction algorithms. To address this issue, we organized the Cardiac MRI Reconstruction Challenge (CMRxRecon) in 2023, in collaboration with the 26th International Conference on MICCAI. CMRxRecon presented an extensive k-space dataset comprising cine and mapping raw data, accompanied by detailed annotations of cardiac anatomical structures. With overwhelming participation, the challenge attracted more than 285 teams and over 600 participants. Among them, 22 teams successfully submitted Docker containers for the testing phase, with 7 teams submitted for both cine and mapping tasks. All teams use deep learning based approaches, indicating that deep learning has predominately become a promising solution for the problem. The first-place winner of both tasks utilizes the E2E-VarNet architecture as backbones. In contrast, U-Net is still the most popular backbone for both multi-coil and single-coil reconstructions. This paper provides a comprehensive overview of the challenge design, presents a summary of the submitted results, reviews the employed methods, and offers an in-depth discussion that aims to inspire future advancements in cardiac MRI reconstruction models. The summary emphasizes the effective strategies observed in Cardiac MRI reconstruction, including backbone architecture, loss function, pre-processing techniques, physical modeling, and model complexity, thereby providing valuable insights for further developments in this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01082v2-abstract-full').style.display = 'none'; document.getElementById('2404.01082v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00598">arXiv:2404.00598</a> <span> [<a href="https://arxiv.org/pdf/2404.00598">pdf</a>, <a href="https://arxiv.org/format/2404.00598">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Robust Beamforming Design and Antenna Selection for Dynamic HRIS-aided MISO System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jintao Wang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+B">Binggui Zhou</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+C">Chengzhi Ma</a>, <a href="/search/eess?searchtype=author&query=Gong%2C+S">Shiqi Gong</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00598v2-abstract-short" style="display: inline;"> In this paper, we propose a dynamic hybrid active-passive reconfigurable intelligent surface (HRIS) to enhance multiple-input-single-output (MISO) communications, leveraging the property of dynamically placing active elements. Specifically, considering the impact of hardware impairments (HWIs), we investigate channel-aware configurations of the receive antennas at the base station (BS) and the act… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00598v2-abstract-full').style.display = 'inline'; document.getElementById('2404.00598v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00598v2-abstract-full" style="display: none;"> In this paper, we propose a dynamic hybrid active-passive reconfigurable intelligent surface (HRIS) to enhance multiple-input-single-output (MISO) communications, leveraging the property of dynamically placing active elements. Specifically, considering the impact of hardware impairments (HWIs), we investigate channel-aware configurations of the receive antennas at the base station (BS) and the active/passive elements at the HRIS to improve transmission reliability. To this end, we address the average mean-square-error (MSE) minimization problem for the HRIS-aided MISO system by jointly optimizing the BS receive antenna selection matrix, the reflection phase coefficients, the reflection amplitude matrix, and the mode selection matrix of the HRIS. To overcome the non-convexity and intractability of this problem, we first transform the binary and discrete variables into continuous ones, and then propose a penalty-based exact block coordinate descent (PEBCD) algorithm to alternately solve these subproblems. Numerical simulations demonstrate the significant superiority of our proposed scheme over conventional benchmark schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00598v2-abstract-full').style.display = 'none'; document.getElementById('2404.00598v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05236">arXiv:2403.05236</a> <span> [<a href="https://arxiv.org/pdf/2403.05236">pdf</a>, <a href="https://arxiv.org/format/2403.05236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Modeling Fault Recovery and Transient Stability of Grid-Forming Converters Equipped With Current Reference Limitation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Arjomandi-Nezhad%2C+A">Ali Arjomandi-Nezhad</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yifei Guo</a>, <a href="/search/eess?searchtype=author&query=Pal%2C+B+C">Bikash C. Pal</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guangya Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05236v3-abstract-short" style="display: inline;"> When grid-forming (GFM) inverter-based resources (IBRs) face severe grid disturbances (e.g., short-circuit faults), the current limitation mechanism may be triggered. Consequently, the GFM IBRs enter the current-saturation mode, inducing nonlinear dynamical behaviors and posing great challenges to the post-disturbance transient angle stability. This paper presents a systematic study to reveal the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05236v3-abstract-full').style.display = 'inline'; document.getElementById('2403.05236v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05236v3-abstract-full" style="display: none;"> When grid-forming (GFM) inverter-based resources (IBRs) face severe grid disturbances (e.g., short-circuit faults), the current limitation mechanism may be triggered. Consequently, the GFM IBRs enter the current-saturation mode, inducing nonlinear dynamical behaviors and posing great challenges to the post-disturbance transient angle stability. This paper presents a systematic study to reveal the fault recovery behaviors of a GFM IBR and identify the risk of instability. A closed-form expression for the necessary condition that a GFM IBR returns from the current-saturation mode to the normal operation mode is presented. Based on these analyses, it is inferred that the angle of the magnitude-saturated current significantly affects the post-fault recovery and transient stability; with different angle selection, the system may follow multiple post-fault trajectories depending on those conditions: 1) Convergence to a normal stable equilibrium point (SEP), 2) convergence to a saturated stable equilibrium point (satSEP), or 3) divergence (instability). In this paper, the circumstances under which a GFM IBR cannot escape from the current-saturation mode are thoroughly investigated. The theoretical analyses are verified by dynamic simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05236v3-abstract-full').style.display = 'none'; document.getElementById('2403.05236v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 22 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03809">arXiv:2403.03809</a> <span> [<a href="https://arxiv.org/pdf/2403.03809">pdf</a>, <a href="https://arxiv.org/ps/2403.03809">ps</a>, <a href="https://arxiv.org/format/2403.03809">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Variational Bayesian Learning based Joint Localization and Path Loss Exponent with Distance-dependent Noise in Wireless Sensor Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yunfei Li</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yiting Luo</a>, <a href="/search/eess?searchtype=author&query=Tan%2C+W">Weiqiang Tan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chunguo Li</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03809v3-abstract-short" style="display: inline;"> This paper focuses on the challenge of jointly optimizing location and path loss exponent (PLE) in distance-dependent noise. Departing from the conventional independent noise model used in localization and path loss exponent estimation problems, we consider a more realistic model incorporating distance-dependent noise variance, as revealed in recent theoretical analyses and experimental results. T… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03809v3-abstract-full').style.display = 'inline'; document.getElementById('2403.03809v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03809v3-abstract-full" style="display: none;"> This paper focuses on the challenge of jointly optimizing location and path loss exponent (PLE) in distance-dependent noise. Departing from the conventional independent noise model used in localization and path loss exponent estimation problems, we consider a more realistic model incorporating distance-dependent noise variance, as revealed in recent theoretical analyses and experimental results. The distance-dependent noise introduces a complex noise model with unknown noise power and PLE, resulting in an exceptionally challenging non-convex and nonlinear optimization problem. In this study, we address a joint localization and path loss exponent estimation problem encompassing distance-dependent noise, unknown parameters, and uncertainties in sensor node locations. To surmount the intractable nonlinear and non-convex objective function inherent in the problem, we introduce a variational Bayesian learning-based framework that enables the joint optimization of localization, path loss exponent, and reference noise parameters by leveraging an effective approximation to the true posterior distribution. Furthermore, the proposed joint learning algorithm provides an iterative closed-form solution and exhibits superior performance in terms of computational complexity compared to existing algorithms. Computer simulation results demonstrate that the proposed algorithm approaches the performance of the Bayesian Cramer-Rao bound (BCRB), achieves localization performance comparable to the (maximum likelihood-Gaussian message passing) ML-GMP algorithm in some cases, and outperforms the other comparison algorithm in all cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03809v3-abstract-full').style.display = 'none'; document.getElementById('2403.03809v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.01093">arXiv:2403.01093</a> <span> [<a href="https://arxiv.org/pdf/2403.01093">pdf</a>, <a href="https://arxiv.org/format/2403.01093">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Variational Bayesian Learning Based Localization and Channel Reconstruction in RIS-aided Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yunfei Li</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yiting Luo</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+X">Xianda Wu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zheng Shi</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+S">Shaodan Ma</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanghua Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.01093v1-abstract-short" style="display: inline;"> The emerging immersive and autonomous services have posed stringent requirements on both communications and localization. By considering the great potential of reconfigurable intelligent surface (RIS), this paper focuses on the joint channel estimation and localization for RIS-aided wireless systems. As opposed to existing works that treat channel estimation and localization independently, this pa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01093v1-abstract-full').style.display = 'inline'; document.getElementById('2403.01093v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.01093v1-abstract-full" style="display: none;"> The emerging immersive and autonomous services have posed stringent requirements on both communications and localization. By considering the great potential of reconfigurable intelligent surface (RIS), this paper focuses on the joint channel estimation and localization for RIS-aided wireless systems. As opposed to existing works that treat channel estimation and localization independently, this paper exploits the intrinsic coupling and nonlinear relationships between the channel parameters and user location for enhancement of both localization and channel reconstruction. By noticing the non-convex, nonlinear objective function and the sparser angle pattern, a variational Bayesian learning-based framework is developed to jointly estimate the channel parameters and user location through leveraging an effective approximation of the posterior distribution. The proposed framework is capable of unifying near-field and far-field scenarios owing to exploitation of sparsity of the angular domain. Since the joint channel and location estimation problem has a closed-form solution in each iteration, our proposed iterative algorithm performs better than the conventional particle swarm optimization (PSO) and maximum likelihood (ML) based ones in terms of computational complexity. Simulations demonstrate that the proposed algorithm almost reaches the Bayesian Cramer-Rao bound (BCRB) and achieves a superior estimation accuracy by comparing to the PSO and the ML algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.01093v1-abstract-full').style.display = 'none'; document.getElementById('2403.01093v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18451">arXiv:2402.18451</a> <span> [<a href="https://arxiv.org/pdf/2402.18451">pdf</a>, <a href="https://arxiv.org/format/2402.18451">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MambaMIR: An Arbitrary-Masked Mamba for Joint Medical Image Reconstruction and Uncertainty Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+L">Liutao Yang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=Aviles-Rivero%2C+A+I">Angelica I. Aviles-Rivero</a>, <a href="/search/eess?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+D">Daoqiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18451v3-abstract-short" style="display: inline;"> The recent Mamba model has shown remarkable adaptability for visual representation learning, including in medical imaging tasks. This study introduces MambaMIR, a Mamba-based model for medical image reconstruction, as well as its Generative Adversarial Network-based variant, MambaMIR-GAN. Our proposed MambaMIR inherits several advantages, such as linear complexity, global receptive fields, and dyn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18451v3-abstract-full').style.display = 'inline'; document.getElementById('2402.18451v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18451v3-abstract-full" style="display: none;"> The recent Mamba model has shown remarkable adaptability for visual representation learning, including in medical imaging tasks. This study introduces MambaMIR, a Mamba-based model for medical image reconstruction, as well as its Generative Adversarial Network-based variant, MambaMIR-GAN. Our proposed MambaMIR inherits several advantages, such as linear complexity, global receptive fields, and dynamic weights, from the original Mamba model. The innovated arbitrary-mask mechanism effectively adapt Mamba to our image reconstruction task, providing randomness for subsequent Monte Carlo-based uncertainty estimation. Experiments conducted on various medical image reconstruction tasks, including fast MRI and SVCT, which cover anatomical regions such as the knee, chest, and abdomen, have demonstrated that MambaMIR and MambaMIR-GAN achieve comparable or superior reconstruction results relative to state-of-the-art methods. Additionally, the estimated uncertainty maps offer further insights into the reliability of the reconstruction quality. The code is publicly available at https://github.com/ayanglab/MambaMIR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18451v3-abstract-full').style.display = 'none'; document.getElementById('2402.18451v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15939">arXiv:2402.15939</a> <span> [<a href="https://arxiv.org/pdf/2402.15939">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Deep Separable Spatiotemporal Learning for Fast Dynamic Cardiac MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zi Wang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+M">Min Xiao</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yirong Zhou</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Chengyan Wang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+N">Naiming Wu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yi Li</a>, <a href="/search/eess?searchtype=author&query=Gong%2C+Y">Yiwen Gong</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+S">Shufu Chang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yinyin Chen</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Liuhong Zhu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+J">Jianjun Zhou</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+C">Congbo Cai</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">He Wang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+D">Di Guo</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a>, <a href="/search/eess?searchtype=author&query=Qu%2C+X">Xiaobo Qu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15939v2-abstract-short" style="display: inline;"> Dynamic magnetic resonance imaging (MRI) plays an indispensable role in cardiac diagnosis. To enable fast imaging, the k-space data can be undersampled but the image reconstruction poses a great challenge of high-dimensional processing. This challenge necessitates extensive training data in deep learning reconstruction methods. In this work, we propose a novel and efficient approach, leveraging a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15939v2-abstract-full').style.display = 'inline'; document.getElementById('2402.15939v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15939v2-abstract-full" style="display: none;"> Dynamic magnetic resonance imaging (MRI) plays an indispensable role in cardiac diagnosis. To enable fast imaging, the k-space data can be undersampled but the image reconstruction poses a great challenge of high-dimensional processing. This challenge necessitates extensive training data in deep learning reconstruction methods. In this work, we propose a novel and efficient approach, leveraging a dimension-reduced separable learning scheme that can perform exceptionally well even with highly limited training data. We design this new approach by incorporating spatiotemporal priors into the development of a Deep Separable Spatiotemporal Learning network (DeepSSL), which unrolls an iteration process of a 2D spatiotemporal reconstruction model with both temporal low-rankness and spatial sparsity. Intermediate outputs can also be visualized to provide insights into the network behavior and enhance interpretability. Extensive results on cardiac cine datasets demonstrate that the proposed DeepSSL surpasses state-of-the-art methods both visually and quantitatively, while reducing the demand for training cases by up to 75%. Additionally, its preliminary adaptability to unseen cardiac patients has been verified through a blind reader study conducted by experienced radiologists and cardiologists. Furthermore, DeepSSL enhances the accuracy of the downstream task of cardiac segmentation and exhibits robustness in prospectively undersampled real-time cardiac MRI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15939v2-abstract-full').style.display = 'none'; document.getElementById('2402.15939v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 14 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.14317">arXiv:2402.14317</a> <span> [<a href="https://arxiv.org/pdf/2402.14317">pdf</a>, <a href="https://arxiv.org/format/2402.14317">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/PESGM51994.2024.10689212">10.1109/PESGM51994.2024.10689212 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Oscillations between Grid-Forming Converters in Weakly Connected Offshore WPPs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ghimire%2C+S">Sulav Ghimire</a>, <a href="/search/eess?searchtype=author&query=Kkuni%2C+K+V">Kanakesh V. Kkuni</a>, <a href="/search/eess?searchtype=author&query=Guerreiro%2C+G+M+G">Gabriel M. G. Guerreiro</a>, <a href="/search/eess?searchtype=author&query=Guest%2C+E+D">Emerson D. Guest</a>, <a href="/search/eess?searchtype=author&query=Jensen%2C+K+H">Kim H. Jensen</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guangya Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.14317v1-abstract-short" style="display: inline;"> This paper studies control interactions between grid-forming (GFM) converters exhibited by power and frequency oscillations in a weakly connected offshore wind power plant (WPP). Two GFM controls are considered, namely virtual synchronous machine (VSM) and virtual admittance (VAdm) based GFM. The GFM control methods are implemented in wind turbine generators (WTGs) of a verified aggregated model o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14317v1-abstract-full').style.display = 'inline'; document.getElementById('2402.14317v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.14317v1-abstract-full" style="display: none;"> This paper studies control interactions between grid-forming (GFM) converters exhibited by power and frequency oscillations in a weakly connected offshore wind power plant (WPP). Two GFM controls are considered, namely virtual synchronous machine (VSM) and virtual admittance (VAdm) based GFM. The GFM control methods are implemented in wind turbine generators (WTGs) of a verified aggregated model of a WPP and the control interaction between these GFM WTGs is studied for several cases: cases with the same GFM control methods, and cases with different GFM control methods. A sensitivity analysis is performed for the observed oscillations to understand which system parameter affects the oscillations the most. Several solution methods are proposed and the inapplicability of some of the conventional solution methods are elaborated in this paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14317v1-abstract-full').style.display = 'none'; document.getElementById('2402.14317v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> PESGM51994.2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.10776">arXiv:2402.10776</a> <span> [<a href="https://arxiv.org/pdf/2402.10776">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ACCESS.2019.2904788">10.1109/ACCESS.2019.2904788 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> In-Vivo Hyperspectral Human Brain Image Database for Brain Cancer Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Fabelo%2C+H">H. Fabelo</a>, <a href="/search/eess?searchtype=author&query=Ortega%2C+S">S. Ortega</a>, <a href="/search/eess?searchtype=author&query=Szolna%2C+A">A. Szolna</a>, <a href="/search/eess?searchtype=author&query=Bulters%2C+D">D. Bulters</a>, <a href="/search/eess?searchtype=author&query=Pineiro%2C+J+F">J. F. Pineiro</a>, <a href="/search/eess?searchtype=author&query=Kabwama%2C+S">S. Kabwama</a>, <a href="/search/eess?searchtype=author&query=Shanahan%2C+A">A. Shanahan</a>, <a href="/search/eess?searchtype=author&query=Bulstrode%2C+H">H. Bulstrode</a>, <a href="/search/eess?searchtype=author&query=Bisshopp%2C+S">S. Bisshopp</a>, <a href="/search/eess?searchtype=author&query=Kiran%2C+B+R">B. R. Kiran</a>, <a href="/search/eess?searchtype=author&query=Ravi%2C+D">D. Ravi</a>, <a href="/search/eess?searchtype=author&query=Lazcano%2C+R">R. Lazcano</a>, <a href="/search/eess?searchtype=author&query=Madronal%2C+D">D. Madronal</a>, <a href="/search/eess?searchtype=author&query=Sosa%2C+C">C. Sosa</a>, <a href="/search/eess?searchtype=author&query=Espino%2C+C">C. Espino</a>, <a href="/search/eess?searchtype=author&query=Marquez%2C+M">M. Marquez</a>, <a href="/search/eess?searchtype=author&query=Plaza%2C+M+D+l+L">M. De la Luz Plaza</a>, <a href="/search/eess?searchtype=author&query=Camacho%2C+R">R. Camacho</a>, <a href="/search/eess?searchtype=author&query=Carrera%2C+D">D. Carrera</a>, <a href="/search/eess?searchtype=author&query=Hernandez%2C+M">M. Hernandez</a>, <a href="/search/eess?searchtype=author&query=Callico%2C+G+M">G. M. Callico</a>, <a href="/search/eess?searchtype=author&query=Morera%2C+J">J. Morera</a>, <a href="/search/eess?searchtype=author&query=Stanciulescu%2C+B">B. Stanciulescu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G+Z">G. Z. Yang</a>, <a href="/search/eess?searchtype=author&query=Salvador%2C+R">R. Salvador</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.10776v1-abstract-short" style="display: inline;"> The use of hyperspectral imaging for medical applications is becoming more common in recent years. One of the main obstacles that researchers find when developing hyperspectral algorithms for medical applications is the lack of specific, publicly available, and hyperspectral medical data. The work described in this paper was developed within the framework of the European project HELICoiD (HypErspe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.10776v1-abstract-full').style.display = 'inline'; document.getElementById('2402.10776v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.10776v1-abstract-full" style="display: none;"> The use of hyperspectral imaging for medical applications is becoming more common in recent years. One of the main obstacles that researchers find when developing hyperspectral algorithms for medical applications is the lack of specific, publicly available, and hyperspectral medical data. The work described in this paper was developed within the framework of the European project HELICoiD (HypErspectraL Imaging Cancer Detection), which had as a main goal the application of hyperspectral imaging to the delineation of brain tumors in real-time during neurosurgical operations. In this paper, the methodology followed to generate the first hyperspectral database of in-vivo human brain tissues is presented. Data was acquired employing a customized hyperspectral acquisition system capable of capturing information in the Visual and Near InfraRed (VNIR) range from 400 to 1000 nm. Repeatability was assessed for the cases where two images of the same scene were captured consecutively. The analysis reveals that the system works more efficiently in the spectral range between 450 and 900 nm. A total of 36 hyperspectral images from 22 different patients were obtained. From these data, more than 300 000 spectral signatures were labeled employing a semi-automatic methodology based on the spectral angle mapper algorithm. Four different classes were defined: normal tissue, tumor tissue, blood vessel, and background elements. All the hyperspectral data has been made available in a public repository. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.10776v1-abstract-full').style.display = 'none'; document.getElementById('2402.10776v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 12 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Access, 2019, 7, pp. 39098 39116 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.08846">arXiv:2402.08846</a> <span> [<a href="https://arxiv.org/pdf/2402.08846">pdf</a>, <a href="https://arxiv.org/format/2402.08846">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> An Embarrassingly Simple Approach for LLM with Strong ASR Capacity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guanrou Yang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Yifan Yang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zhifu Gao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiaming Wang</a>, <a href="/search/eess?searchtype=author&query=Du%2C+Z">Zhihao Du</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+F">Fan Yu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qian Chen</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+S">Siqi Zheng</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Shiliang Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xie Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.08846v1-abstract-short" style="display: inline;"> In this paper, we focus on solving one of the most important tasks in the field of speech processing, i.e., automatic speech recognition (ASR), with speech foundation encoders and large language models (LLM). Recent works have complex designs such as compressing the output temporally for the speech encoder, tackling modal alignment for the projector, and utilizing parameter-efficient fine-tuning f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08846v1-abstract-full').style.display = 'inline'; document.getElementById('2402.08846v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.08846v1-abstract-full" style="display: none;"> In this paper, we focus on solving one of the most important tasks in the field of speech processing, i.e., automatic speech recognition (ASR), with speech foundation encoders and large language models (LLM). Recent works have complex designs such as compressing the output temporally for the speech encoder, tackling modal alignment for the projector, and utilizing parameter-efficient fine-tuning for the LLM. We found that delicate designs are not necessary, while an embarrassingly simple composition of off-the-shelf speech encoder, LLM, and the only trainable linear projector is competent for the ASR task. To be more specific, we benchmark and explore various combinations of LLMs and speech encoders, leading to the optimal LLM-based ASR system, which we call SLAM-ASR. The proposed SLAM-ASR provides a clean setup and little task-specific design, where only the linear projector is trained. To the best of our knowledge, SLAM-ASR achieves the best performance on the Librispeech benchmark among LLM-based ASR models and even outperforms the latest LLM-based audio-universal model trained on massive pair data. Finally, we explore the capability emergence of LLM-based ASR in the process of modal alignment. We hope that our study can facilitate the research on extending LLM with cross-modality capacity and shed light on the LLM-based ASR community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08846v1-abstract-full').style.display = 'none'; document.getElementById('2402.08846v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Working in progress and will open-source soon</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.07403">arXiv:2402.07403</a> <span> [<a href="https://arxiv.org/pdf/2402.07403">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Make it more specific: A novel uncertainty based airway segmentation application on 3D U-Net and its variants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shiyi Wang</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=N%2C+F+F">Felder Federico N</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Sheng Zhang</a>, <a href="/search/eess?searchtype=author&query=F%2C+W+S+L">Walsh Simon L F</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.07403v1-abstract-short" style="display: inline;"> Each medical segmentation task should be considered with a specific AI algorithm based on its scenario so that the most accurate prediction model can be obtained. The most popular algorithms in medical segmentation, 3D U-Net and its variants, can directly implement the task of lung trachea segmentation, but its failure to consider the special tree-like structure of the trachea suggests that there… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.07403v1-abstract-full').style.display = 'inline'; document.getElementById('2402.07403v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.07403v1-abstract-full" style="display: none;"> Each medical segmentation task should be considered with a specific AI algorithm based on its scenario so that the most accurate prediction model can be obtained. The most popular algorithms in medical segmentation, 3D U-Net and its variants, can directly implement the task of lung trachea segmentation, but its failure to consider the special tree-like structure of the trachea suggests that there is much room for improvement in its segmentation accuracy. Therefore, a research gap exists because a great amount of state-of-the-art DL algorithms are vanilla 3D U-Net structures, which do not introduce the various performance-enhancing modules that come with special natural image modality in lung airway segmentation. In this paper, we proposed two different network structures Branch-Level U-Net (B-UNet) and Branch-Level CE-UNet (B-CE-UNet) which are based on U-Net structure and compared the prediction results with the same dataset. Specially, both of the two networks add branch loss and central line loss to learn the feature of fine branch endings of the airways. Uncertainty estimation algorithms are also included to attain confident predictions and thereby, increase the overall trustworthiness of our whole model. In addition, predictions of the lung trachea based on the maximum connectivity rate were calculated and extracted during post-processing for segmentation refinement and pruning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.07403v1-abstract-full').style.display = 'none'; document.getElementById('2402.07403v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.07192">arXiv:2402.07192</a> <span> [<a href="https://arxiv.org/pdf/2402.07192">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1371/journal.pone.0193721">10.1371/journal.pone.0193721 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Spatio-spectral classification of hyperspectral images for brain cancer detection during surgical operations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Fabelo%2C+H">H. Fabelo</a>, <a href="/search/eess?searchtype=author&query=Ortega%2C+S">S. Ortega</a>, <a href="/search/eess?searchtype=author&query=Ravi%2C+D">D. Ravi</a>, <a href="/search/eess?searchtype=author&query=Kiran%2C+B+R">B. R. Kiran</a>, <a href="/search/eess?searchtype=author&query=Sosa%2C+C">C. Sosa</a>, <a href="/search/eess?searchtype=author&query=Bulters%2C+D">D. Bulters</a>, <a href="/search/eess?searchtype=author&query=Callico%2C+G+M">G. M. Callico</a>, <a href="/search/eess?searchtype=author&query=Bulstrode%2C+H">H. Bulstrode</a>, <a href="/search/eess?searchtype=author&query=Szolna%2C+A">A. Szolna</a>, <a href="/search/eess?searchtype=author&query=Pineiro%2C+J+F">J. F. Pineiro</a>, <a href="/search/eess?searchtype=author&query=Kabwama%2C+S">S. Kabwama</a>, <a href="/search/eess?searchtype=author&query=Madronal%2C+D">D. Madronal</a>, <a href="/search/eess?searchtype=author&query=Lazcano%2C+R">R. Lazcano</a>, <a href="/search/eess?searchtype=author&query=OShanahan%2C+A+J">A. J. OShanahan</a>, <a href="/search/eess?searchtype=author&query=Bisshopp%2C+S">S. Bisshopp</a>, <a href="/search/eess?searchtype=author&query=Hernandez%2C+M">M. Hernandez</a>, <a href="/search/eess?searchtype=author&query=Baez-Quevedo%2C+A">A. Baez-Quevedo</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G+Z">G. Z. Yang</a>, <a href="/search/eess?searchtype=author&query=Stanciulescu%2C+B">B. Stanciulescu</a>, <a href="/search/eess?searchtype=author&query=Salvador%2C+R">R. Salvador</a>, <a href="/search/eess?searchtype=author&query=Juarez%2C+E">E. Juarez</a>, <a href="/search/eess?searchtype=author&query=Sarmiento%2C+R">R. Sarmiento</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.07192v1-abstract-short" style="display: inline;"> Surgery for brain cancer is a major problem in neurosurgery. The diffuse infiltration into the surrounding normal brain by these tumors makes their accurate identification by the naked eye difficult. Since surgery is the common treatment for brain cancer, an accurate radical resection of the tumor leads to improved survival rates for patients. However, the identification of the tumor boundaries du… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.07192v1-abstract-full').style.display = 'inline'; document.getElementById('2402.07192v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.07192v1-abstract-full" style="display: none;"> Surgery for brain cancer is a major problem in neurosurgery. The diffuse infiltration into the surrounding normal brain by these tumors makes their accurate identification by the naked eye difficult. Since surgery is the common treatment for brain cancer, an accurate radical resection of the tumor leads to improved survival rates for patients. However, the identification of the tumor boundaries during surgery is challenging. Hyperspectral imaging is a noncontact, non-ionizing and non-invasive technique suitable for medical diagnosis. This study presents the development of a novel classification method taking into account the spatial and spectral characteristics of the hyperspectral images to help neurosurgeons to accurately determine the tumor boundaries in surgical-time during the resection, avoiding excessive excision of normal tissue or unintentionally leaving residual tumor. The algorithm proposed in this study to approach an efficient solution consists of a hybrid framework that combines both supervised and unsupervised machine learning methods. To evaluate the proposed approach, five hyperspectral images of surface of the brain affected by glioblastoma tumor in vivo from five different patients have been used. The final classification maps obtained have been analyzed and validated by specialists. These preliminary results are promising, obtaining an accurate delineation of the tumor area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.07192v1-abstract-full').style.display = 'none'; document.getElementById('2402.07192v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.03473">arXiv:2402.03473</a> <span> [<a href="https://arxiv.org/pdf/2402.03473">pdf</a>, <a href="https://arxiv.org/format/2402.03473">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Assessing the Efficacy of Invisible Watermarks in AI-Generated Medical Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+H">Huiyu Zhou</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yingying Fang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.03473v3-abstract-short" style="display: inline;"> AI-generated medical images are gaining growing popularity due to their potential to address the data scarcity challenge in the real world. However, the issue of accurate identification of these synthetic images, particularly when they exhibit remarkable realism with their real copies, remains a concern. To mitigate this challenge, image generators such as DALLE and Imagen, have integrated digital… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03473v3-abstract-full').style.display = 'inline'; document.getElementById('2402.03473v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.03473v3-abstract-full" style="display: none;"> AI-generated medical images are gaining growing popularity due to their potential to address the data scarcity challenge in the real world. However, the issue of accurate identification of these synthetic images, particularly when they exhibit remarkable realism with their real copies, remains a concern. To mitigate this challenge, image generators such as DALLE and Imagen, have integrated digital watermarks aimed at facilitating the discernment of synthetic images' authenticity. These watermarks are embedded within the image pixels and are invisible to the human eye while remains their detectability. Nevertheless, a comprehensive investigation into the potential impact of these invisible watermarks on the utility of synthetic medical images has been lacking. In this study, we propose the incorporation of invisible watermarks into synthetic medical images and seek to evaluate their efficacy in the context of downstream classification tasks. Our goal is to pave the way for discussions on the viability of such watermarks in boosting the detectability of synthetic medical images, fortifying ethical standards, and safeguarding against data pollution and potential scams. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03473v3-abstract-full').style.display = 'none'; document.getElementById('2402.03473v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ISBI 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.16564">arXiv:2401.16564</a> <span> [<a href="https://arxiv.org/pdf/2401.16564">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/RBME.2024.3485022">10.1109/RBME.2024.3485022 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Data and Physics driven Deep Learning Models for Fast MRI Reconstruction: Fundamentals and Methodologies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yinzhe Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fanwen Wang</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+Y">Yingying Fang</a>, <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=Alkan%2C+C">Cagan Alkan</a>, <a href="/search/eess?searchtype=author&query=Abraham%2C+D">Daniel Abraham</a>, <a href="/search/eess?searchtype=author&query=Liao%2C+C">Congyu Liao</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Lei Xu</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zhifan Gao</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+W">Weiwen Wu</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+L">Lei Zhu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhaolin Chen</a>, <a href="/search/eess?searchtype=author&query=Lally%2C+P">Peter Lally</a>, <a href="/search/eess?searchtype=author&query=Bangerter%2C+N">Neal Bangerter</a>, <a href="/search/eess?searchtype=author&query=Setsompop%2C+K">Kawin Setsompop</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yike Guo</a>, <a href="/search/eess?searchtype=author&query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+G">Ge Wang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Guang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.16564v2-abstract-short" style="display: inline;"> Magnetic Resonance Imaging (MRI) is a pivotal clinical diagnostic tool, yet its extended scanning times often compromise patient comfort and image quality, especially in volumetric, temporal and quantitative scans. This review elucidates recent advances in MRI acceleration via data and physics-driven models, leveraging techniques from algorithm unrolling models, enhancement-based methods, and plug… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16564v2-abstract-full').style.display = 'inline'; document.getElementById('2401.16564v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.16564v2-abstract-full" style="display: none;"> Magnetic Resonance Imaging (MRI) is a pivotal clinical diagnostic tool, yet its extended scanning times often compromise patient comfort and image quality, especially in volumetric, temporal and quantitative scans. This review elucidates recent advances in MRI acceleration via data and physics-driven models, leveraging techniques from algorithm unrolling models, enhancement-based methods, and plug-and-play models to the emerging full spectrum of generative model-based methods. We also explore the synergistic integration of data models with physics-based insights, encompassing the advancements in multi-coil hardware accelerations like parallel imaging and simultaneous multi-slice imaging, and the optimization of sampling patterns. We then focus on domain-specific challenges and opportunities, including image redundancy exploitation, image integrity, evaluation metrics, data heterogeneity, and model generalization. This work also discusses potential solutions and future research directions, with an emphasis on the role of data harmonization and federated learning for further improving the general applicability and performance of these methods in MRI reconstruction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16564v2-abstract-full').style.display = 'none'; document.getElementById('2401.16564v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Reviews in Biomedical Engineering (RBME)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13564">arXiv:2401.13564</a> <span> [<a href="https://arxiv.org/pdf/2401.13564">pdf</a>, <a href="https://arxiv.org/ps/2401.13564">ps</a>, <a href="https://arxiv.org/format/2401.13564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TWC.2024.3430328">10.1109/TWC.2024.3430328 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> RIS Empowered Near-Field Covert Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jun Liu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Gang Yang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yuanwei Liu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+X">Xiangyun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13564v2-abstract-short" style="display: inline;"> This paper studies an extremely large-scale reconfigurable intelligent surface (XL-RIS) empowered covert communication system in the near-field region. Alice covertly transmits messages to Bob with the assistance of the XL-RIS, while evading detection by Willie. To enhance the covert communication performance, we maximize the achievable covert rate by jointly optimizing the hybrid analog and digit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13564v2-abstract-full').style.display = 'inline'; document.getElementById('2401.13564v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13564v2-abstract-full" style="display: none;"> This paper studies an extremely large-scale reconfigurable intelligent surface (XL-RIS) empowered covert communication system in the near-field region. Alice covertly transmits messages to Bob with the assistance of the XL-RIS, while evading detection by Willie. To enhance the covert communication performance, we maximize the achievable covert rate by jointly optimizing the hybrid analog and digital beamformers at Alice, as well as the reflection coefficient matrix at the XL-RIS. An alternating optimization algorithm is proposed to solve the joint beamforming design problem. For the hybrid beamformer design, a semi-closed-form solution for fully digital beamformer is first obtained by a weighted minimum mean-square error based algorithm, then the baseband digital and analog beamformers at Alice are designed by approximating the fully digital beamformer via manifold optimization. For the XL-RIS's reflection coefficient matrix design, a low-complexity alternating direction method of multipliers based algorithm is proposed to address the challenge of large-scale variables and unit-modulus constraints. Numerical results unveil that i) the near-field communications can achieve a higher covert rate than the far-field covert communications in general, and still realize covert transmission even if Willie is located at the same direction as Bob and closer to the XL-RIS; ii) the proposed algorithm can enhance the covert rate significantly compared to the benchmark schemes; iii) the proposed algorithm leads to a beam diffraction pattern that can bypass Willie and achieve high-rate covert transmission to Bob. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13564v2-abstract-full').style.display = 'none'; document.getElementById('2401.13564v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 8 figures, online published in IEEE Transactions on Wireless Communications on 25 July 2024 (IEEE Xplore link: https://ieeexplore.ieee.org/document/10609798)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Wireless Communications (25 July 2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.13752">arXiv:2312.13752</a> <span> [<a href="https://arxiv.org/pdf/2312.13752">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.media.2024.103253">10.1016/j.media.2024.103253 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Hunting imaging biomarkers in pulmonary fibrosis: Benchmarks of the AIIB23 challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Nan%2C+Y">Yang Nan</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+X">Xiaodan Xing</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shiyi Wang</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+Z">Zeyu Tang</a>, <a href="/search/eess?searchtype=author&query=Felder%2C+F+N">Federico N Felder</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Sheng Zhang</a>, <a href="/search/eess?searchtype=author&query=Ledda%2C+R+E">Roberta Eufrasia Ledda</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+X">Xiaoliu Ding</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+R">Ruiqi Yu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+W">Weiping Liu</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+F">Feng Shi</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+T">Tianyang Sun</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+Z">Zehong Cao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+M">Minghui Zhang</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+Y">Yun Gu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Hanxiao Zhang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+J">Jian Gao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+P">Pingyu Wang</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+W">Wen Tang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+P">Pengxin Yu</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+H">Han Kang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Junqiang Chen</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+X">Xing Lu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+B">Boyu Zhang</a>, <a href="/search/eess?searchtype=author&query=Mamalakis%2C+M">Michail Mamalakis</a> , et al. (16 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.13752v2-abstract-short" style="display: inline;"> Airway-related quantitative imaging biomarkers are crucial for examination, diagnosis, and prognosis in pulmonary diseases. However, the manual delineation of airway trees remains prohibitively time-consuming. While significant efforts have been made towards enhancing airway modelling, current public-available datasets concentrate on lung diseases with moderate morphological variations. The intric… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13752v2-abstract-full').style.display = 'inline'; document.getElementById('2312.13752v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.13752v2-abstract-full" style="display: none;"> Airway-related quantitative imaging biomarkers are crucial for examination, diagnosis, and prognosis in pulmonary diseases. However, the manual delineation of airway trees remains prohibitively time-consuming. While significant efforts have been made towards enhancing airway modelling, current public-available datasets concentrate on lung diseases with moderate morphological variations. The intricate honeycombing patterns present in the lung tissues of fibrotic lung disease patients exacerbate the challenges, often leading to various prediction errors. To address this issue, the 'Airway-Informed Quantitative CT Imaging Biomarker for Fibrotic Lung Disease 2023' (AIIB23) competition was organized in conjunction with the official 2023 International Conference on Medical Image Computing and Computer Assisted Intervention (MICCAI). The airway structures were meticulously annotated by three experienced radiologists. Competitors were encouraged to develop automatic airway segmentation models with high robustness and generalization abilities, followed by exploring the most correlated QIB of mortality prediction. A training set of 120 high-resolution computerised tomography (HRCT) scans were publicly released with expert annotations and mortality status. The online validation set incorporated 52 HRCT scans from patients with fibrotic lung disease and the offline test set included 140 cases from fibrosis and COVID-19 patients. The results have shown that the capacity of extracting airway trees from patients with fibrotic lung disease could be enhanced by introducing voxel-wise weighted general union loss and continuity loss. In addition to the competitive image biomarkers for prognosis, a strong airway-derived biomarker (Hazard ratio>1.5, p<0.0001) was revealed for survival prognostication compared with existing clinical measurements, clinician assessment and AI-based biomarkers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13752v2-abstract-full').style.display = 'none'; document.getElementById('2312.13752v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.13154">arXiv:2312.13154</a> <span> [<a href="https://arxiv.org/pdf/2312.13154">pdf</a>, <a href="https://arxiv.org/format/2312.13154">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Joint Range-Velocity-Azimuth Estimation for OFDM-Based Integrated Sensing and Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+Z">Zelin Hu</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+Q">Qibin Ye</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yixuan Huang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+S">Su Hu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+G">Gang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.13154v1-abstract-short" style="display: inline;"> Orthogonal frequency division multiplexing (OFDM)-based integrated sensing and communication (ISAC) is promising for future sixth-generation mobile communication systems. Existing works focus on the joint estimation of the targets' range and velocity for OFDM-based ISAC systems. In contrast, this paper studies the three-dimensional joint estimation (3DJE) of range, velocity, and azimuth for OFDM-b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13154v1-abstract-full').style.display = 'inline'; document.getElementById('2312.13154v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.13154v1-abstract-full" style="display: none;"> Orthogonal frequency division multiplexing (OFDM)-based integrated sensing and communication (ISAC) is promising for future sixth-generation mobile communication systems. Existing works focus on the joint estimation of the targets' range and velocity for OFDM-based ISAC systems. In contrast, this paper studies the three-dimensional joint estimation (3DJE) of range, velocity, and azimuth for OFDM-based ISAC systems with multiple receive antennas. First, we establish the signal model and derive the Cramer-Rao bounds (CRBs) on the 3DJE. Furthermore, an auto-paired super-resolution 3DJE algorithm is proposed by exploiting the reconstructed observation sub-signal's translational invariance property in the time, frequency, and space domains. Finally, with the 5G New Radio parameter setup, simulation results show that the proposed algorithm achieves better estimation performance and its root mean square error is closer to the root of CRBs than existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.13154v1-abstract-full').style.display = 'none'; document.getElementById('2312.13154v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This manuscript has been submitted to the IEEE journal in 09-Aug-2023</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yang%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yang%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yang%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yang%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yang%2C+G&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Yang%2C+G&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository