Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–42 of 42 results for author: <span class="mathjax">Wei, D</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Wei%2C+D">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Wei, D"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Wei%2C+D&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Wei, D"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05228">arXiv:2502.05228</a> <span> [<a href="https://arxiv.org/pdf/2502.05228">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Multi-Objective Mobile Damped Wave Algorithm (MOMDWA): A Novel Approach For Quantum System Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+J">Juntao Yu</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+J">Jiaquan Yu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dedai Wei</a>, <a href="/search/eess?searchtype=author&query=Sha%2C+X">Xinye Sha</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+S">Shengwei Fu</a>, <a href="/search/eess?searchtype=author&query=Qiu%2C+M">Miuyu Qiu</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+Y">Yurun Jin</a>, <a href="/search/eess?searchtype=author&query=Ouyang%2C+K">Kaichen Ouyang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05228v1-abstract-short" style="display: inline;"> In this paper, we introduce a novel multi-objective optimization algorithm, the Multi-Objective Mobile Damped Wave Algorithm (MOMDWA), specifically designed to address complex quantum control problems. Our approach extends the capabilities of the original Mobile Damped Wave Algorithm (MDWA) by incorporating multiple objectives, enabling a more comprehensive optimization process. We applied MOMDWA… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05228v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05228v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05228v1-abstract-full" style="display: none;"> In this paper, we introduce a novel multi-objective optimization algorithm, the Multi-Objective Mobile Damped Wave Algorithm (MOMDWA), specifically designed to address complex quantum control problems. Our approach extends the capabilities of the original Mobile Damped Wave Algorithm (MDWA) by incorporating multiple objectives, enabling a more comprehensive optimization process. We applied MOMDWA to three quantum control scenarios, focusing on optimizing the balance between control fidelity, energy consumption, and control smoothness. The results demonstrate that MOMDWA significantly enhances quantum control efficiency and robustness, achieving high fidelity while minimizing energy use and ensuring smooth control pulses. This advancement offers a valuable tool for quantum computing and other domains requiring precise, multi-objective control. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05228v1-abstract-full').style.display = 'none'; document.getElementById('2502.05228v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.19845">arXiv:2411.19845</a> <span> [<a href="https://arxiv.org/pdf/2411.19845">pdf</a>, <a href="https://arxiv.org/format/2411.19845">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Visual-inertial Localization Algorithm using Opportunistic Visual Beacons and Dead-Reckoning for GNSS-Denied Large-scale Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Liqiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+Y">Ye Tian</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dongyan Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.19845v2-abstract-short" style="display: inline;"> With the development of smart cities, the demand for continuous pedestrian navigation in large-scale urban environments has significantly increased. While global navigation satellite systems (GNSS) provide low-cost and reliable positioning services, they are often hindered in complex urban canyon environments. Thus, exploring opportunistic signals for positioning in urban areas has become a key so… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19845v2-abstract-full').style.display = 'inline'; document.getElementById('2411.19845v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.19845v2-abstract-full" style="display: none;"> With the development of smart cities, the demand for continuous pedestrian navigation in large-scale urban environments has significantly increased. While global navigation satellite systems (GNSS) provide low-cost and reliable positioning services, they are often hindered in complex urban canyon environments. Thus, exploring opportunistic signals for positioning in urban areas has become a key solution. Augmented reality (AR) allows pedestrians to acquire real-time visual information. Accordingly, we propose a low-cost visual-inertial positioning solution. This method comprises a lightweight multi-scale group convolution (MSGC)-based visual place recognition (VPR) neural network, a pedestrian dead reckoning (PDR) algorithm, and a visual/inertial fusion approach based on a Kalman filter with gross error suppression. The VPR serves as a conditional observation to the Kalman filter, effectively correcting the errors accumulated through the PDR method. This enables the entire algorithm to ensure the reliability of long-term positioning in GNSS-denied areas. Extensive experimental results demonstrate that our method maintains stable positioning during large-scale movements. Compared to the lightweight MobileNetV3-based VPR method, our proposed VPR solution improves Recall@1 by at least 3\% on two public datasets while reducing the number of parameters by 63.37\%. It also achieves performance that is comparable to the VGG16-based method. The VPR-PDR algorithm improves localization accuracy by more than 40\% compared to the original PDR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19845v2-abstract-full').style.display = 'none'; document.getElementById('2411.19845v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08597">arXiv:2409.08597</a> <span> [<a href="https://arxiv.org/pdf/2409.08597">pdf</a>, <a href="https://arxiv.org/format/2409.08597">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> LA-RAG:Enhancing LLM-based ASR Accuracy with Retrieval-Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+S">Shaojun Li</a>, <a href="/search/eess?searchtype=author&query=Shang%2C+H">Hengchao Shang</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Daimeng Wei</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+J">Jiaxin Guo</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zongyao Li</a>, <a href="/search/eess?searchtype=author&query=He%2C+X">Xianghui He</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+M">Min Zhang</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+H">Hao Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08597v1-abstract-short" style="display: inline;"> Recent advancements in integrating speech information into large language models (LLMs) have significantly improved automatic speech recognition (ASR) accuracy. However, existing methods often constrained by the capabilities of the speech encoders under varied acoustic conditions, such as accents. To address this, we propose LA-RAG, a novel Retrieval-Augmented Generation (RAG) paradigm for LLM-bas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08597v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08597v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08597v1-abstract-full" style="display: none;"> Recent advancements in integrating speech information into large language models (LLMs) have significantly improved automatic speech recognition (ASR) accuracy. However, existing methods often constrained by the capabilities of the speech encoders under varied acoustic conditions, such as accents. To address this, we propose LA-RAG, a novel Retrieval-Augmented Generation (RAG) paradigm for LLM-based ASR. LA-RAG leverages fine-grained token-level speech datastores and a speech-to-speech retrieval mechanism to enhance ASR accuracy via LLM in-context learning (ICL) capabilities. Experiments on Mandarin and various Chinese dialect datasets demonstrate significant improvements in ASR accuracy compared to existing methods, validating the effectiveness of our approach, especially in handling accent variations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08597v1-abstract-full').style.display = 'none'; document.getElementById('2409.08597v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02005">arXiv:2407.02005</a> <span> [<a href="https://arxiv.org/pdf/2407.02005">pdf</a>, <a href="https://arxiv.org/format/2407.02005">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> An End-to-End Speech Summarization Using Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shang%2C+H">Hengchao Shang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zongyao Li</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+J">Jiaxin Guo</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S">Shaojun Li</a>, <a href="/search/eess?searchtype=author&query=Rao%2C+Z">Zhiqiang Rao</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yuanchang Luo</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Daimeng Wei</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+H">Hao Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02005v1-abstract-short" style="display: inline;"> Abstractive Speech Summarization (SSum) aims to generate human-like text summaries from spoken content. It encounters difficulties in handling long speech input and capturing the intricate cross-modal mapping between long speech inputs and short text summaries. Research on large language models (LLMs) and multimodal information fusion has provided new insights for addressing these challenges. In t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02005v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02005v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02005v1-abstract-full" style="display: none;"> Abstractive Speech Summarization (SSum) aims to generate human-like text summaries from spoken content. It encounters difficulties in handling long speech input and capturing the intricate cross-modal mapping between long speech inputs and short text summaries. Research on large language models (LLMs) and multimodal information fusion has provided new insights for addressing these challenges. In this paper, we propose an end-to-end SSum model that utilizes Q-Former as a connector for the audio-text modality and employs LLMs to generate text summaries directly from speech features. We adopt a multi-stage training approach that includes LLM based ASR and Text Summarization (TSum) tasks as auxiliary tasks. ASR tasks are used to align feature spaces and enhance the LLM's ability to handle longer speech. Then, we utilize a curriculum learning strategy to facilitate the model's transition from TSum to SSum. Finally, our model achieves competitive performance on the How-2 dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02005v1-abstract-full').style.display = 'none'; document.getElementById('2407.02005v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">InterSpeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09696">arXiv:2406.09696</a> <span> [<a href="https://arxiv.org/pdf/2406.09696">pdf</a>, <a href="https://arxiv.org/format/2406.09696">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MoME: Mixture of Multimodal Experts for Cancer Survival Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xiong%2C+C">Conghao Xiong</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+H">Hao Zheng</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a>, <a href="/search/eess?searchtype=author&query=Sung%2C+J+J+Y">Joseph J. Y. Sung</a>, <a href="/search/eess?searchtype=author&query=King%2C+I">Irwin King</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09696v1-abstract-short" style="display: inline;"> Survival analysis, as a challenging task, requires integrating Whole Slide Images (WSIs) and genomic data for comprehensive decision-making. There are two main challenges in this task: significant heterogeneity and complex inter- and intra-modal interactions between the two modalities. Previous approaches utilize co-attention methods, which fuse features from both modalities only once after separa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09696v1-abstract-full').style.display = 'inline'; document.getElementById('2406.09696v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09696v1-abstract-full" style="display: none;"> Survival analysis, as a challenging task, requires integrating Whole Slide Images (WSIs) and genomic data for comprehensive decision-making. There are two main challenges in this task: significant heterogeneity and complex inter- and intra-modal interactions between the two modalities. Previous approaches utilize co-attention methods, which fuse features from both modalities only once after separate encoding. However, these approaches are insufficient for modeling the complex task due to the heterogeneous nature between the modalities. To address these issues, we propose a Biased Progressive Encoding (BPE) paradigm, performing encoding and fusion simultaneously. This paradigm uses one modality as a reference when encoding the other. It enables deep fusion of the modalities through multiple alternating iterations, progressively reducing the cross-modal disparities and facilitating complementary interactions. Besides modality heterogeneity, survival analysis involves various biomarkers from WSIs, genomics, and their combinations. The critical biomarkers may exist in different modalities under individual variations, necessitating flexible adaptation of the models to specific scenarios. Therefore, we further propose a Mixture of Multimodal Experts (MoME) layer to dynamically selects tailored experts in each stage of the BPE paradigm. Experts incorporate reference information from another modality to varying degrees, enabling a balanced or biased focus on different modalities during the encoding process. Extensive experimental results demonstrate the superior performance of our method on various datasets, including TCGA-BLCA, TCGA-UCEC and TCGA-LUAD. Codes are available at https://github.com/BearCleverProud/MoME. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09696v1-abstract-full').style.display = 'none'; document.getElementById('2406.09696v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 + 1/2 pages, early accepted to MICCAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04791">arXiv:2406.04791</a> <span> [<a href="https://arxiv.org/pdf/2406.04791">pdf</a>, <a href="https://arxiv.org/format/2406.04791">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Speaker-Smoothed kNN Speaker Adaptation for End-to-End ASR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+S">Shaojun Li</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Daimeng Wei</a>, <a href="/search/eess?searchtype=author&query=Shang%2C+H">Hengchao Shang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+J">Jiaxin Guo</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">ZongYao Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Z">Zhanglin Wu</a>, <a href="/search/eess?searchtype=author&query=Rao%2C+Z">Zhiqiang Rao</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yuanchang Luo</a>, <a href="/search/eess?searchtype=author&query=He%2C+X">Xianghui He</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+H">Hao Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04791v3-abstract-short" style="display: inline;"> Despite recent improvements in End-to-End Automatic Speech Recognition (E2E ASR) systems, the performance can degrade due to vocal characteristic mismatches between training and testing data, particularly with limited target speaker adaptation data. We propose a novel speaker adaptation approach Speaker-Smoothed kNN that leverages k-Nearest Neighbors (kNN) retrieval techniques to improve model out… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04791v3-abstract-full').style.display = 'inline'; document.getElementById('2406.04791v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04791v3-abstract-full" style="display: none;"> Despite recent improvements in End-to-End Automatic Speech Recognition (E2E ASR) systems, the performance can degrade due to vocal characteristic mismatches between training and testing data, particularly with limited target speaker adaptation data. We propose a novel speaker adaptation approach Speaker-Smoothed kNN that leverages k-Nearest Neighbors (kNN) retrieval techniques to improve model output by finding correctly pronounced tokens from its pre-built datastore during the decoding phase. Moreover, we utilize x-vector to dynamically adjust kNN interpolation parameters for data sparsity issue. This approach was validated using KeSpeech and MagicData corpora under in-domain and all-domain settings. Our method consistently performs comparably to fine-tuning without the associated performance degradation during speaker changes. Furthermore, in the all-domain setting, our method achieves state-of-the-art results, reducing the CER in both single speaker and multi-speaker test scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04791v3-abstract-full').style.display = 'none'; document.getElementById('2406.04791v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Interspeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16197">arXiv:2405.16197</a> <span> [<a href="https://arxiv.org/pdf/2405.16197">pdf</a>, <a href="https://arxiv.org/format/2405.16197">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A 7K Parameter Model for Underwater Image Enhancement based on Transmission Map Prior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhou%2C+F">Fuheng Zhou</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dikai Wei</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+Y">Ye Fan</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yulong Huang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yonggang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16197v1-abstract-short" style="display: inline;"> Although deep learning based models for underwater image enhancement have achieved good performance, they face limitations in both lightweight and effectiveness, which prevents their deployment and application on resource-constrained platforms. Moreover, most existing deep learning based models use data compression to get high-level semantic information in latent space instead of using the origina… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16197v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16197v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16197v1-abstract-full" style="display: none;"> Although deep learning based models for underwater image enhancement have achieved good performance, they face limitations in both lightweight and effectiveness, which prevents their deployment and application on resource-constrained platforms. Moreover, most existing deep learning based models use data compression to get high-level semantic information in latent space instead of using the original information. Therefore, they require decoder blocks to generate the details of the output. This requires additional computational cost. In this paper, a lightweight network named lightweight selective attention network (LSNet) based on the top-k selective attention and transmission maps mechanism is proposed. The proposed model achieves a PSNR of 97\% with only 7K parameters compared to a similar attention-based model. Extensive experiments show that the proposed LSNet achieves excellent performance in state-of-the-art models with significantly fewer parameters and computational resources. The code is available at https://github.com/FuhengZhou/LSNet}{https://github.com/FuhengZhou/LSNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16197v1-abstract-full').style.display = 'none'; document.getElementById('2405.16197v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14435">arXiv:2404.14435</a> <span> [<a href="https://arxiv.org/pdf/2404.14435">pdf</a>, <a href="https://arxiv.org/format/2404.14435">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Frenet-Serret Frame-based Decomposition for Part Segmentation of 3D Curvilinear Structures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gu%2C+L">Leslie Gu</a>, <a href="/search/eess?searchtype=author&query=Adhinarta%2C+J+K">Jason Ken Adhinarta</a>, <a href="/search/eess?searchtype=author&query=Bessmeltsev%2C+M">Mikhail Bessmeltsev</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jiancheng Yang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y+J">Yongjie Jessica Zhang</a>, <a href="/search/eess?searchtype=author&query=Yin%2C+W">Wenjie Yin</a>, <a href="/search/eess?searchtype=author&query=Berger%2C+D">Daniel Berger</a>, <a href="/search/eess?searchtype=author&query=Lichtman%2C+J">Jeff Lichtman</a>, <a href="/search/eess?searchtype=author&query=Pfister%2C+H">Hanspeter Pfister</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Donglai Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14435v2-abstract-short" style="display: inline;"> Accurately segmenting 3D curvilinear structures in medical imaging remains challenging due to their complex geometry and the scarcity of diverse, large-scale datasets for algorithm development and evaluation. In this paper, we use dendritic spine segmentation as a case study and address these challenges by introducing a novel Frenet--Serret Frame-based Decomposition, which decomposes 3D curvilinea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14435v2-abstract-full').style.display = 'inline'; document.getElementById('2404.14435v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14435v2-abstract-full" style="display: none;"> Accurately segmenting 3D curvilinear structures in medical imaging remains challenging due to their complex geometry and the scarcity of diverse, large-scale datasets for algorithm development and evaluation. In this paper, we use dendritic spine segmentation as a case study and address these challenges by introducing a novel Frenet--Serret Frame-based Decomposition, which decomposes 3D curvilinear structures into a globally $ C^2 $ continuous curve that captures the overall shape, and a cylindrical primitive that encodes local geometric properties. This approach leverages Frenet--Serret Frames and arc length parameterization to preserve essential geometric features while reducing representational complexity, facilitating data-efficient learning, improved segmentation accuracy, and generalization on 3D curvilinear structures. To rigorously evaluate our method, we introduce two datasets: CurviSeg, a synthetic dataset for 3D curvilinear structure segmentation that validates our method's key properties, and DenSpineEM, a benchmark for dendritic spine segmentation, which comprises 4,476 manually annotated spines from 70 dendrites across three public electron microscopy datasets, covering multiple brain regions and species. Our experiments on DenSpineEM demonstrate exceptional cross-region and cross-species generalization: models trained on the mouse somatosensory cortex subset achieve 91.9\% Dice, maintaining strong performance in zero-shot segmentation on both mouse visual cortex (94.1\% Dice) and human frontal lobe (81.8\% Dice) subsets. Moreover, we test the generalizability of our method on the IntrA dataset, where it achieves 77.08\% Dice (5.29\% higher than prior arts) on intracranial aneurysm segmentation. These findings demonstrate the potential of our approach for accurately analyzing complex curvilinear structures across diverse medical imaging fields. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14435v2-abstract-full').style.display = 'none'; document.getElementById('2404.14435v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06080">arXiv:2404.06080</a> <span> [<a href="https://arxiv.org/pdf/2404.06080">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Using Few-Shot Learning to Classify Primary Lung Cancer and Other Malignancy with Lung Metastasis in Cytological Imaging via Endobronchial Ultrasound Procedures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+C">Ching-Kai Lin</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Di-Chun Wei</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+Y">Yun-Chien Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06080v2-abstract-short" style="display: inline;"> This study aims to establish a computer-aided diagnosis system for endobronchial ultrasound (EBUS) surgery to assist physicians in the preliminary diagnosis of metastatic cancer. This involves arranging immediate examinations for other sites of metastatic cancer after EBUS surgery, eliminating the need to wait for reports, thereby shortening the waiting time by more than half and enabling patients… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06080v2-abstract-full').style.display = 'inline'; document.getElementById('2404.06080v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06080v2-abstract-full" style="display: none;"> This study aims to establish a computer-aided diagnosis system for endobronchial ultrasound (EBUS) surgery to assist physicians in the preliminary diagnosis of metastatic cancer. This involves arranging immediate examinations for other sites of metastatic cancer after EBUS surgery, eliminating the need to wait for reports, thereby shortening the waiting time by more than half and enabling patients to detect other cancers earlier, allowing for early planning and implementation of treatment plans. Unlike previous studies on cell image classification, which have abundant datasets for training, this study must also be able to make effective classifications despite the limited amount of case data for lung metastatic cancer. In the realm of small data set classification methods, Few-shot learning (FSL) has become mainstream in recent years. Through its ability to train on small datasets and its strong generalization capabilities, FSL shows potential in this task of lung metastatic cell image classification. This study will adopt the approach of Few-shot learning, referencing existing proposed models, and designing a model architecture for classifying lung metastases cell images. Batch Spectral Regularization (BSR) will be incorporated as a loss update parameter, and the Finetune method of PMF will be modified. In terms of test results, the addition of BSR and the modified Finetune method further increases the accuracy by 8.89% to 65.60%, outperforming other FSL methods. This study confirms that FSL is superior to supervised and transfer learning in classifying metastatic cancer and demonstrates that using BSR as a loss function and modifying Finetune can enhance the model's capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06080v2-abstract-full').style.display = 'none'; document.getElementById('2404.06080v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.04904">arXiv:2404.04904</a> <span> [<a href="https://arxiv.org/pdf/2404.04904">pdf</a>, <a href="https://arxiv.org/format/2404.04904">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Cross-Domain Audio Deepfake Detection: Dataset and Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuang Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+M">Min Zhang</a>, <a href="/search/eess?searchtype=author&query=Ren%2C+M">Mengxin Ren</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+M">Miaomiao Ma</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Daimeng Wei</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+H">Hao Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.04904v2-abstract-short" style="display: inline;"> Audio deepfake detection (ADD) is essential for preventing the misuse of synthetic voices that may infringe on personal rights and privacy. Recent zero-shot text-to-speech (TTS) models pose higher risks as they can clone voices with a single utterance. However, the existing ADD datasets are outdated, leading to suboptimal generalization of detection models. In this paper, we construct a new cross-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.04904v2-abstract-full').style.display = 'inline'; document.getElementById('2404.04904v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.04904v2-abstract-full" style="display: none;"> Audio deepfake detection (ADD) is essential for preventing the misuse of synthetic voices that may infringe on personal rights and privacy. Recent zero-shot text-to-speech (TTS) models pose higher risks as they can clone voices with a single utterance. However, the existing ADD datasets are outdated, leading to suboptimal generalization of detection models. In this paper, we construct a new cross-domain ADD dataset comprising over 300 hours of speech data that is generated by five advanced zero-shot TTS models. To simulate real-world scenarios, we employ diverse attack methods and audio prompts from different datasets. Experiments show that, through novel attack-augmented training, the Wav2Vec2-large and Whisper-medium models achieve equal error rates of 4.1\% and 6.5\% respectively. Additionally, we demonstrate our models' outstanding few-shot ADD ability by fine-tuning with just one minute of target-domain data. Nonetheless, neural codec compressors greatly affect the detection accuracy, necessitating further research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.04904v2-abstract-full').style.display = 'none'; document.getElementById('2404.04904v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09372">arXiv:2402.09372</a> <span> [<a href="https://arxiv.org/pdf/2402.09372">pdf</a>, <a href="https://arxiv.org/format/2402.09372">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deep Rib Fracture Instance Segmentation and Classification from CT on the RibFrac Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jiancheng Yang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+R">Rui Shi</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+L">Liang Jin</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+X">Xiaoyang Huang</a>, <a href="/search/eess?searchtype=author&query=Kuang%2C+K">Kaiming Kuang</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Donglai Wei</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+S">Shixuan Gu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jianying Liu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+P">Pengfei Liu</a>, <a href="/search/eess?searchtype=author&query=Chai%2C+Z">Zhizhong Chai</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+Y">Yongjie Xiao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Liming Xu</a>, <a href="/search/eess?searchtype=author&query=Du%2C+B">Bang Du</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+X">Xiangyi Yan</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+H">Hao Tang</a>, <a href="/search/eess?searchtype=author&query=Alessio%2C+A">Adam Alessio</a>, <a href="/search/eess?searchtype=author&query=Holste%2C+G">Gregory Holste</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jiapeng Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xiaoming Wang</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Jianye He</a>, <a href="/search/eess?searchtype=author&query=Che%2C+L">Lixuan Che</a>, <a href="/search/eess?searchtype=author&query=Pfister%2C+H">Hanspeter Pfister</a>, <a href="/search/eess?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/eess?searchtype=author&query=Ni%2C+B">Bingbing Ni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09372v1-abstract-short" style="display: inline;"> Rib fractures are a common and potentially severe injury that can be challenging and labor-intensive to detect in CT scans. While there have been efforts to address this field, the lack of large-scale annotated datasets and evaluation benchmarks has hindered the development and validation of deep learning algorithms. To address this issue, the RibFrac Challenge was introduced, providing a benchmar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09372v1-abstract-full').style.display = 'inline'; document.getElementById('2402.09372v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09372v1-abstract-full" style="display: none;"> Rib fractures are a common and potentially severe injury that can be challenging and labor-intensive to detect in CT scans. While there have been efforts to address this field, the lack of large-scale annotated datasets and evaluation benchmarks has hindered the development and validation of deep learning algorithms. To address this issue, the RibFrac Challenge was introduced, providing a benchmark dataset of over 5,000 rib fractures from 660 CT scans, with voxel-level instance mask annotations and diagnosis labels for four clinical categories (buckle, nondisplaced, displaced, or segmental). The challenge includes two tracks: a detection (instance segmentation) track evaluated by an FROC-style metric and a classification track evaluated by an F1-style metric. During the MICCAI 2020 challenge period, 243 results were evaluated, and seven teams were invited to participate in the challenge summary. The analysis revealed that several top rib fracture detection solutions achieved performance comparable or even better than human experts. Nevertheless, the current rib fracture classification solutions are hardly clinically applicable, which can be an interesting area in the future. As an active benchmark and research resource, the data and online evaluation of the RibFrac Challenge are available at the challenge website. As an independent contribution, we have also extended our previous internal baseline by incorporating recent advancements in large-scale pretrained networks and point-based rib segmentation techniques. The resulting FracNet+ demonstrates competitive performance in rib fracture detection, which lays a foundation for further research and development in AI-assisted rib fracture detection and diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09372v1-abstract-full').style.display = 'none'; document.getElementById('2402.09372v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Challenge paper for MICCAI RibFrac Challenge (https://ribfrac.grand-challenge.org/)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.05689">arXiv:2401.05689</a> <span> [<a href="https://arxiv.org/pdf/2401.05689">pdf</a>, <a href="https://arxiv.org/format/2401.05689">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICASSP49357.2023.10096194">10.1109/ICASSP49357.2023.10096194 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> UCorrect: An Unsupervised Framework for Automatic Speech Recognition Error Correction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Guo%2C+J">Jiaxin Guo</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+M">Minghan Wang</a>, <a href="/search/eess?searchtype=author&query=Qiao%2C+X">Xiaosong Qiao</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Daimeng Wei</a>, <a href="/search/eess?searchtype=author&query=Shang%2C+H">Hengchao Shang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zongyao Li</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+Z">Zhengzhe Yu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yinglu Li</a>, <a href="/search/eess?searchtype=author&query=Su%2C+C">Chang Su</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+M">Min Zhang</a>, <a href="/search/eess?searchtype=author&query=Tao%2C+S">Shimin Tao</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+H">Hao Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.05689v1-abstract-short" style="display: inline;"> Error correction techniques have been used to refine the output sentences from automatic speech recognition (ASR) models and achieve a lower word error rate (WER). Previous works usually adopt end-to-end models and has strong dependency on Pseudo Paired Data and Original Paired Data. But when only pre-training on Pseudo Paired Data, previous models have negative effect on correction. While fine-tu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05689v1-abstract-full').style.display = 'inline'; document.getElementById('2401.05689v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.05689v1-abstract-full" style="display: none;"> Error correction techniques have been used to refine the output sentences from automatic speech recognition (ASR) models and achieve a lower word error rate (WER). Previous works usually adopt end-to-end models and has strong dependency on Pseudo Paired Data and Original Paired Data. But when only pre-training on Pseudo Paired Data, previous models have negative effect on correction. While fine-tuning on Original Paired Data, the source side data must be transcribed by a well-trained ASR model, which takes a lot of time and not universal. In this paper, we propose UCorrect, an unsupervised Detector-Generator-Selector framework for ASR Error Correction. UCorrect has no dependency on the training data mentioned before. The whole procedure is first to detect whether the character is erroneous, then to generate some candidate characters and finally to select the most confident one to replace the error character. Experiments on the public AISHELL-1 dataset and WenetSpeech dataset show the effectiveness of UCorrect for ASR error correction: 1) it achieves significant WER reduction, achieves 6.83\% even without fine-tuning and 14.29\% after fine-tuning; 2) it outperforms the popular NAR correction models by a large margin with a competitive low latency; and 3) it is an universal method, as it reduces all WERs of the ASR model with different decoding strategies and reduces all WERs of ASR models trained on different scale datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05689v1-abstract-full').style.display = 'none'; document.getElementById('2401.05689v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in ICASSP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.01726">arXiv:2312.01726</a> <span> [<a href="https://arxiv.org/pdf/2312.01726">pdf</a>, <a href="https://arxiv.org/format/2312.01726">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Simultaneous Alignment and Surface Regression Using Hybrid 2D-3D Networks for 3D Coherent Layer Segmentation of Retinal OCT Images with Full and Sparse Annotations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hong Liu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+D">Donghuan Lu</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+X">Xiaoying Tang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Liansheng Wang</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.01726v1-abstract-short" style="display: inline;"> Layer segmentation is important to quantitative analysis of retinal optical coherence tomography (OCT). Recently, deep learning based methods have been developed to automate this task and yield remarkable performance. However, due to the large spatial gap and potential mismatch between the B-scans of an OCT volume, all of them were based on 2D segmentation of individual B-scans, which may lose the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01726v1-abstract-full').style.display = 'inline'; document.getElementById('2312.01726v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.01726v1-abstract-full" style="display: none;"> Layer segmentation is important to quantitative analysis of retinal optical coherence tomography (OCT). Recently, deep learning based methods have been developed to automate this task and yield remarkable performance. However, due to the large spatial gap and potential mismatch between the B-scans of an OCT volume, all of them were based on 2D segmentation of individual B-scans, which may lose the continuity and diagnostic information of the retinal layers in 3D space. Besides, most of these methods required dense annotation of the OCT volumes, which is labor-intensive and expertise-demanding. This work presents a novel framework based on hybrid 2D-3D convolutional neural networks (CNNs) to obtain continuous 3D retinal layer surfaces from OCT volumes, which works well with both full and sparse annotations. The 2D features of individual B-scans are extracted by an encoder consisting of 2D convolutions. These 2D features are then used to produce the alignment displacement vectors and layer segmentation by two 3D decoders coupled via a spatial transformer module. Two losses are proposed to utilize the retinal layers' natural property of being smooth for B-scan alignment and layer segmentation, respectively, and are the key to the semi-supervised learning with sparse annotation. The entire framework is trained end-to-end. To the best of our knowledge, this is the first work that attempts 3D retinal layer segmentation in volumetric OCT images based on CNNs. Experiments on a synthetic dataset and three public clinical datasets show that our framework can effectively align the B-scans for potential motion correction, and achieves superior performance to state-of-the-art 2D deep learning methods in terms of both layer segmentation accuracy and cross-B-scan 3D continuity in both fully and semi-supervised settings, thus offering more clinical values than previous works. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01726v1-abstract-full').style.display = 'none'; document.getElementById('2312.01726v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by MIA. arXiv admin note: text overlap with arXiv:2203.02390</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.17329">arXiv:2309.17329</a> <span> [<a href="https://arxiv.org/pdf/2309.17329">pdf</a>, <a href="https://arxiv.org/format/2309.17329">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.media.2024.103367">10.1016/j.media.2024.103367 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Efficient Anatomical Labeling of Pulmonary Tree Structures via Deep Point-Graph Representation-based Implicit Fields </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xie%2C+K">Kangxian Xie</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jiancheng Yang</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Donglai Wei</a>, <a href="/search/eess?searchtype=author&query=Weng%2C+Z">Ziqiao Weng</a>, <a href="/search/eess?searchtype=author&query=Fua%2C+P">Pascal Fua</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.17329v3-abstract-short" style="display: inline;"> Pulmonary diseases rank prominently among the principal causes of death worldwide. Curing them will require, among other things, a better understanding of the complex 3D tree-shaped structures within the pulmonary system, such as airways, arteries, and veins. Traditional approaches using high-resolution image stacks and standard CNNs on dense voxel grids face challenges in computational efficiency… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.17329v3-abstract-full').style.display = 'inline'; document.getElementById('2309.17329v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.17329v3-abstract-full" style="display: none;"> Pulmonary diseases rank prominently among the principal causes of death worldwide. Curing them will require, among other things, a better understanding of the complex 3D tree-shaped structures within the pulmonary system, such as airways, arteries, and veins. Traditional approaches using high-resolution image stacks and standard CNNs on dense voxel grids face challenges in computational efficiency, limited resolution, local context, and inadequate preservation of shape topology. Our method addresses these issues by shifting from dense voxel to sparse point representation, offering better memory efficiency and global context utilization. However, the inherent sparsity in point representation can lead to a loss of crucial connectivity in tree-shaped structures. To mitigate this, we introduce graph learning on skeletonized structures, incorporating differentiable feature fusion for improved topology and long-distance context capture. Furthermore, we employ an implicit function for efficient conversion of sparse representations into dense reconstructions end-to-end. The proposed method not only delivers state-of-the-art performance in labeling accuracy, both overall and at key locations, but also enables efficient inference and the generation of closed surface shapes. Addressing data scarcity in this field, we have also curated a comprehensive dataset to validate our approach. Data and code are available at \url{https://github.com/M3DV/pulmonary-tree-labeling}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.17329v3-abstract-full').style.display = 'none'; document.getElementById('2309.17329v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Medical Image Analysis</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T45; 62P10; 68U10; 68U05; 05C90 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.12805">arXiv:2309.12805</a> <span> [<a href="https://arxiv.org/pdf/2309.12805">pdf</a>, <a href="https://arxiv.org/format/2309.12805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1002/mp.16692">10.1002/mp.16692 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Automatic view plane prescription for cardiac magnetic resonance imaging via supervision by spatial relationship between views </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yawen Huang</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+D">Donghuan Lu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuexiang Li</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.12805v1-abstract-short" style="display: inline;"> Background: View planning for the acquisition of cardiac magnetic resonance (CMR) imaging remains a demanding task in clinical practice. Purpose: Existing approaches to its automation relied either on an additional volumetric image not typically acquired in clinic routine, or on laborious manual annotations of cardiac structural landmarks. This work presents a clinic-compatible, annotation-free sy… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.12805v1-abstract-full').style.display = 'inline'; document.getElementById('2309.12805v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.12805v1-abstract-full" style="display: none;"> Background: View planning for the acquisition of cardiac magnetic resonance (CMR) imaging remains a demanding task in clinical practice. Purpose: Existing approaches to its automation relied either on an additional volumetric image not typically acquired in clinic routine, or on laborious manual annotations of cardiac structural landmarks. This work presents a clinic-compatible, annotation-free system for automatic CMR view planning. Methods: The system mines the spatial relationship, more specifically, locates the intersecting lines, between the target planes and source views, and trains deep networks to regress heatmaps defined by distances from the intersecting lines. The intersection lines are the prescription lines prescribed by the technologists at the time of image acquisition using cardiac landmarks, and retrospectively identified from the spatial relationship. As the spatial relationship is self-contained in properly stored data, the need for additional manual annotation is eliminated. In addition, the interplay of multiple target planes predicted in a source view is utilized in a stacked hourglass architecture to gradually improve the regression. Then, a multi-view planning strategy is proposed to aggregate information from the predicted heatmaps for all the source views of a target plane, for a globally optimal prescription, mimicking the similar strategy practiced by skilled human prescribers. Results: The experiments include 181 CMR exams. Our system yields the mean angular difference and point-to-plane distance of 5.68 degrees and 3.12 mm, respectively. It not only achieves superior accuracy to existing approaches including conventional atlas-based and newer deep-learning-based in prescribing the four standard CMR planes but also demonstrates prescription of the first cardiac-anatomy-oriented plane(s) from the body-oriented scout. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.12805v1-abstract-full').style.display = 'none'; document.getElementById('2309.12805v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Medical Physics. arXiv admin note: text overlap with arXiv:2109.11715</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.14274">arXiv:2306.14274</a> <span> [<a href="https://arxiv.org/pdf/2306.14274">pdf</a>, <a href="https://arxiv.org/format/2306.14274">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MEPNet: A Model-Driven Equivariant Proximal Network for Joint Sparse-View Reconstruction and Metal Artifact Reduction in CT Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hong Wang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+M">Minghao Zhou</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuexiang Li</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.14274v1-abstract-short" style="display: inline;"> Sparse-view computed tomography (CT) has been adopted as an important technique for speeding up data acquisition and decreasing radiation dose. However, due to the lack of sufficient projection data, the reconstructed CT images often present severe artifacts, which will be further amplified when patients carry metallic implants. For this joint sparse-view reconstruction and metal artifact reductio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14274v1-abstract-full').style.display = 'inline'; document.getElementById('2306.14274v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.14274v1-abstract-full" style="display: none;"> Sparse-view computed tomography (CT) has been adopted as an important technique for speeding up data acquisition and decreasing radiation dose. However, due to the lack of sufficient projection data, the reconstructed CT images often present severe artifacts, which will be further amplified when patients carry metallic implants. For this joint sparse-view reconstruction and metal artifact reduction task, most of the existing methods are generally confronted with two main limitations: 1) They are almost built based on common network modules without fully embedding the physical imaging geometry constraint of this specific task into the dual-domain learning; 2) Some important prior knowledge is not deeply explored and sufficiently utilized. Against these issues, we specifically construct a dual-domain reconstruction model and propose a model-driven equivariant proximal network, called MEPNet. The main characteristics of MEPNet are: 1) It is optimization-inspired and has a clear working mechanism; 2) The involved proximal operator is modeled via a rotation equivariant convolutional neural network, which finely represents the inherent rotational prior underlying the CT scanning that the same organ can be imaged at different angles. Extensive experiments conducted on several datasets comprehensively substantiate that compared with the conventional convolution-based proximal network, such a rotation equivariance mechanism enables our proposed method to achieve better reconstruction performance with fewer network parameters. We will release the code at \url{https://github.com/hongwang01/MEPNet}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14274v1-abstract-full').style.display = 'none'; document.getElementById('2306.14274v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MICCAI 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06767">arXiv:2306.06767</a> <span> [<a href="https://arxiv.org/pdf/2306.06767">pdf</a>, <a href="https://arxiv.org/format/2306.06767">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.metrad.2023.100007">10.1016/j.metrad.2023.100007 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The Impact of ChatGPT and LLMs on Medical Imaging Stakeholders: Perspectives and Use Cases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jiancheng Yang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H+B">Hongwei Bran Li</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Donglai Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06767v2-abstract-short" style="display: inline;"> This study investigates the transformative potential of Large Language Models (LLMs), such as OpenAI ChatGPT, in medical imaging. With the aid of public data, these models, which possess remarkable language understanding and generation capabilities, are augmenting the interpretive skills of radiologists, enhancing patient-physician communication, and streamlining clinical workflows. The paper intr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06767v2-abstract-full').style.display = 'inline'; document.getElementById('2306.06767v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06767v2-abstract-full" style="display: none;"> This study investigates the transformative potential of Large Language Models (LLMs), such as OpenAI ChatGPT, in medical imaging. With the aid of public data, these models, which possess remarkable language understanding and generation capabilities, are augmenting the interpretive skills of radiologists, enhancing patient-physician communication, and streamlining clinical workflows. The paper introduces an analytic framework for presenting the complex interactions between LLMs and the broader ecosystem of medical imaging stakeholders, including businesses, insurance entities, governments, research institutions, and hospitals (nicknamed BIGR-H). Through detailed analyses, illustrative use cases, and discussions on the broader implications and future directions, this perspective seeks to raise discussion in strategic planning and decision-making in the era of AI-enabled healthcare. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06767v2-abstract-full').style.display = 'none'; document.getElementById('2306.06767v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper invited for the first issue of Meta-Radiology</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10009">arXiv:2305.10009</a> <span> [<a href="https://arxiv.org/pdf/2305.10009">pdf</a>, <a href="https://arxiv.org/format/2305.10009">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Modular and High-Resolution Time-Frequency Post-Processing Technique </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shen%2C+J">Jinshun Shen</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Deyun Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10009v1-abstract-short" style="display: inline;"> In this letter, based on the variational model, we propose a novel time-frequency post-processing technique to approximate the ideal time-frequency representation. Our method has the advantage of modularity, enabling "plug and play", independent of the performance of specific time-frequency analysis tool. Therefore, it can be easily generalized to the fractional Fourier domain and the linear canon… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10009v1-abstract-full').style.display = 'inline'; document.getElementById('2305.10009v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10009v1-abstract-full" style="display: none;"> In this letter, based on the variational model, we propose a novel time-frequency post-processing technique to approximate the ideal time-frequency representation. Our method has the advantage of modularity, enabling "plug and play", independent of the performance of specific time-frequency analysis tool. Therefore, it can be easily generalized to the fractional Fourier domain and the linear canonical domain. Additionally, high-resolution is its merit, which depends on the specific instantaneous frequency estimation method. We reveal the relationship between instantaneous frequency estimation and reassignment method. The effectiveness of the proposed method is verified on both synthetic signals and real world signal. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10009v1-abstract-full').style.display = 'none'; document.getElementById('2305.10009v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.05302">arXiv:2303.05302</a> <span> [<a href="https://arxiv.org/pdf/2303.05302">pdf</a>, <a href="https://arxiv.org/format/2303.05302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> M3AE: Multimodal Representation Learning for Brain Tumor Segmentation with Missing Modalities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hong Liu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+D">Donghuan Lu</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+J">Jinghan Sun</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Liansheng Wang</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.05302v1-abstract-short" style="display: inline;"> Multimodal magnetic resonance imaging (MRI) provides complementary information for sub-region analysis of brain tumors. Plenty of methods have been proposed for automatic brain tumor segmentation using four common MRI modalities and achieved remarkable performance. In practice, however, it is common to have one or more modalities missing due to image corruption, artifacts, acquisition protocols, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.05302v1-abstract-full').style.display = 'inline'; document.getElementById('2303.05302v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.05302v1-abstract-full" style="display: none;"> Multimodal magnetic resonance imaging (MRI) provides complementary information for sub-region analysis of brain tumors. Plenty of methods have been proposed for automatic brain tumor segmentation using four common MRI modalities and achieved remarkable performance. In practice, however, it is common to have one or more modalities missing due to image corruption, artifacts, acquisition protocols, allergy to contrast agents, or simply cost. In this work, we propose a novel two-stage framework for brain tumor segmentation with missing modalities. In the first stage, a multimodal masked autoencoder (M3AE) is proposed, where both random modalities (i.e., modality dropout) and random patches of the remaining modalities are masked for a reconstruction task, for self-supervised learning of robust multimodal representations against missing modalities. To this end, we name our framework M3AE. Meanwhile, we employ model inversion to optimize a representative full-modal image at marginal extra cost, which will be used to substitute for the missing modalities and boost performance during inference. Then in the second stage, a memory-efficient self distillation is proposed to distill knowledge between heterogenous missing-modal situations while fine-tuning the model for supervised segmentation. Our M3AE belongs to the 'catch-all' genre where a single model can be applied to all possible subsets of modalities, thus is economic for both training and deployment. Extensive experiments on BraTS 2018 and 2020 datasets demonstrate its superior performance to existing state-of-the-art methods with missing modalities, as well as the efficacy of its components. Our code is available at: https://github.com/ccarliu/m3ae. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.05302v1-abstract-full').style.display = 'none'; document.getElementById('2303.05302v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> AAAI 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.10431">arXiv:2212.10431</a> <span> [<a href="https://arxiv.org/pdf/2212.10431">pdf</a>, <a href="https://arxiv.org/format/2212.10431">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> QuantArt: Quantizing Image Style Transfer Towards High Visual Fidelity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Huang%2C+S">Siyu Huang</a>, <a href="/search/eess?searchtype=author&query=An%2C+J">Jie An</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Donglai Wei</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+J">Jiebo Luo</a>, <a href="/search/eess?searchtype=author&query=Pfister%2C+H">Hanspeter Pfister</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.10431v2-abstract-short" style="display: inline;"> The mechanism of existing style transfer algorithms is by minimizing a hybrid loss function to push the generated image toward high similarities in both content and style. However, this type of approach cannot guarantee visual fidelity, i.e., the generated artworks should be indistinguishable from real ones. In this paper, we devise a new style transfer framework called QuantArt for high visual-fi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10431v2-abstract-full').style.display = 'inline'; document.getElementById('2212.10431v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.10431v2-abstract-full" style="display: none;"> The mechanism of existing style transfer algorithms is by minimizing a hybrid loss function to push the generated image toward high similarities in both content and style. However, this type of approach cannot guarantee visual fidelity, i.e., the generated artworks should be indistinguishable from real ones. In this paper, we devise a new style transfer framework called QuantArt for high visual-fidelity stylization. QuantArt pushes the latent representation of the generated artwork toward the centroids of the real artwork distribution with vector quantization. By fusing the quantized and continuous latent representations, QuantArt allows flexible control over the generated artworks in terms of content preservation, style similarity, and visual fidelity. Experiments on various style transfer settings show that our QuantArt framework achieves significantly higher visual fidelity compared with the existing style transfer methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10431v2-abstract-full').style.display = 'none'; document.getElementById('2212.10431v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CVPR 2023. Code is available at https://github.com/siyuhuang/QuantArt</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.09309">arXiv:2210.09309</a> <span> [<a href="https://arxiv.org/pdf/2210.09309">pdf</a>, <a href="https://arxiv.org/format/2210.09309">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> RibSeg v2: A Large-scale Benchmark for Rib Labeling and Anatomical Centerline Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jin%2C+L">Liang Jin</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+S">Shixuan Gu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Donglai Wei</a>, <a href="/search/eess?searchtype=author&query=Adhinarta%2C+J+K">Jason Ken Adhinarta</a>, <a href="/search/eess?searchtype=author&query=Kuang%2C+K">Kaiming Kuang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y+J">Yongjie Jessica Zhang</a>, <a href="/search/eess?searchtype=author&query=Pfister%2C+H">Hanspeter Pfister</a>, <a href="/search/eess?searchtype=author&query=Ni%2C+B">Bingbing Ni</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jiancheng Yang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+M">Ming Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.09309v4-abstract-short" style="display: inline;"> Automatic rib labeling and anatomical centerline extraction are common prerequisites for various clinical applications. Prior studies either use in-house datasets that are inaccessible to communities, or focus on rib segmentation that neglects the clinical significance of rib labeling. To address these issues, we extend our prior dataset (RibSeg) on the binary rib segmentation task to a comprehens… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.09309v4-abstract-full').style.display = 'inline'; document.getElementById('2210.09309v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.09309v4-abstract-full" style="display: none;"> Automatic rib labeling and anatomical centerline extraction are common prerequisites for various clinical applications. Prior studies either use in-house datasets that are inaccessible to communities, or focus on rib segmentation that neglects the clinical significance of rib labeling. To address these issues, we extend our prior dataset (RibSeg) on the binary rib segmentation task to a comprehensive benchmark, named RibSeg v2, with 660 CT scans (15,466 individual ribs in total) and annotations manually inspected by experts for rib labeling and anatomical centerline extraction. Based on the RibSeg v2, we develop a pipeline including deep learning-based methods for rib labeling, and a skeletonization-based method for centerline extraction. To improve computational efficiency, we propose a sparse point cloud representation of CT scans and compare it with standard dense voxel grids. Moreover, we design and analyze evaluation metrics to address the key challenges of each task. Our dataset, code, and model are available online to facilitate open research at https://github.com/M3DV/RibSeg <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.09309v4-abstract-full').style.display = 'none'; document.getElementById('2210.09309v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figures, journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.03180">arXiv:2207.03180</a> <span> [<a href="https://arxiv.org/pdf/2207.03180">pdf</a>, <a href="https://arxiv.org/format/2207.03180">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Deformer: Towards Displacement Field Learning for Unsupervised Medical Image Registration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jiashun Chen</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+D">Donghuan Lu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Ning%2C+M">Munan Ning</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+X">Xinyu Shi</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Z">Zhe Xu</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.03180v1-abstract-short" style="display: inline;"> Recently, deep-learning-based approaches have been widely studied for deformable image registration task. However, most efforts directly map the composite image representation to spatial transformation through the convolutional neural network, ignoring its limited ability to capture spatial correspondence. On the other hand, Transformer can better characterize the spatial relationship with attenti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.03180v1-abstract-full').style.display = 'inline'; document.getElementById('2207.03180v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.03180v1-abstract-full" style="display: none;"> Recently, deep-learning-based approaches have been widely studied for deformable image registration task. However, most efforts directly map the composite image representation to spatial transformation through the convolutional neural network, ignoring its limited ability to capture spatial correspondence. On the other hand, Transformer can better characterize the spatial relationship with attention mechanism, its long-range dependency may be harmful to the registration task, where voxels with too large distances are unlikely to be corresponding pairs. In this study, we propose a novel Deformer module along with a multi-scale framework for the deformable image registration task. The Deformer module is designed to facilitate the mapping from image representation to spatial transformation by formulating the displacement vector prediction as the weighted summation of several bases. With the multi-scale framework to predict the displacement fields in a coarse-to-fine manner, superior performance can be achieved compared with traditional and learning-based approaches. Comprehensive experiments on two public datasets are conducted to demonstrate the effectiveness of the proposed Deformer module as well as the multi-scale framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.03180v1-abstract-full').style.display = 'none'; document.getElementById('2207.03180v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.02425">arXiv:2206.02425</a> <span> [<a href="https://arxiv.org/pdf/2206.02425">pdf</a>, <a href="https://arxiv.org/format/2206.02425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> mmFormer: Multimodal Medical Transformer for Incomplete Multimodal Learning of Brain Tumor Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yao Zhang</a>, <a href="/search/eess?searchtype=author&query=He%2C+N">Nanjun He</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jiawei Yang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuexiang Li</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Yawen Huang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yang Zhang</a>, <a href="/search/eess?searchtype=author&query=He%2C+Z">Zhiqiang He</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.02425v2-abstract-short" style="display: inline;"> Accurate brain tumor segmentation from Magnetic Resonance Imaging (MRI) is desirable to joint learning of multimodal images. However, in clinical practice, it is not always possible to acquire a complete set of MRIs, and the problem of missing modalities causes severe performance degradation in existing multimodal segmentation methods. In this work, we present the first attempt to exploit the Tran… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.02425v2-abstract-full').style.display = 'inline'; document.getElementById('2206.02425v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.02425v2-abstract-full" style="display: none;"> Accurate brain tumor segmentation from Magnetic Resonance Imaging (MRI) is desirable to joint learning of multimodal images. However, in clinical practice, it is not always possible to acquire a complete set of MRIs, and the problem of missing modalities causes severe performance degradation in existing multimodal segmentation methods. In this work, we present the first attempt to exploit the Transformer for multimodal brain tumor segmentation that is robust to any combinatorial subset of available modalities. Concretely, we propose a novel multimodal Medical Transformer (mmFormer) for incomplete multimodal learning with three main components: the hybrid modality-specific encoders that bridge a convolutional encoder and an intra-modal Transformer for both local and global context modeling within each modality; an inter-modal Transformer to build and align the long-range correlations across modalities for modality-invariant features with global semantics corresponding to tumor region; a decoder that performs a progressive up-sampling and fusion with the modality-invariant features to generate robust segmentation. Besides, auxiliary regularizers are introduced in both encoder and decoder to further enhance the model's robustness to incomplete modalities. We conduct extensive experiments on the public BraTS $2018$ dataset for brain tumor segmentation. The results demonstrate that the proposed mmFormer outperforms the state-of-the-art methods for incomplete multimodal brain tumor segmentation on almost all subsets of incomplete modalities, especially by an average 19.07% improvement of Dice on tumor segmentation with only one available modality. The code is available at https://github.com/YaoZhang93/mmFormer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.02425v2-abstract-full').style.display = 'none'; document.getElementById('2206.02425v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to MICCAI 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.10595">arXiv:2205.10595</a> <span> [<a href="https://arxiv.org/pdf/2205.10595">pdf</a>, <a href="https://arxiv.org/ps/2205.10595">ps</a>, <a href="https://arxiv.org/format/2205.10595">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-642-23626-6_53">10.1007/978-3-642-23626-6_53 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Myocardial Segmentation of Late Gadolinium Enhanced MR Images by Propagation of Contours from Cine MR Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Ying Sun</a>, <a href="/search/eess?searchtype=author&query=Chai%2C+P">Ping Chai</a>, <a href="/search/eess?searchtype=author&query=Low%2C+A">Adrian Low</a>, <a href="/search/eess?searchtype=author&query=Ong%2C+S+H">Sim Heng Ong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.10595v1-abstract-short" style="display: inline;"> Automatic segmentation of myocardium in Late Gadolinium Enhanced (LGE) Cardiac MR (CMR) images is often difficult due to the intensity heterogeneity resulting from accumulation of contrast agent in infarcted areas. In this paper, we propose an automatic segmentation framework that fully utilizes shared information between corresponding cine and LGE images of a same patient. Given myocardial contou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.10595v1-abstract-full').style.display = 'inline'; document.getElementById('2205.10595v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.10595v1-abstract-full" style="display: none;"> Automatic segmentation of myocardium in Late Gadolinium Enhanced (LGE) Cardiac MR (CMR) images is often difficult due to the intensity heterogeneity resulting from accumulation of contrast agent in infarcted areas. In this paper, we propose an automatic segmentation framework that fully utilizes shared information between corresponding cine and LGE images of a same patient. Given myocardial contours in cine CMR images, the proposed framework achieves accurate segmentation of LGE CMR images in a coarse-to-fine manner. Affine registration is first performed between the corresponding cine and LGE image pair, followed by nonrigid registration, and finally local deformation of myocardial contours driven by forces derived from local features of the LGE image. Experimental results on real patient data with expert outlined ground truth show that the proposed framework can generate accurate and reliable results for myocardial segmentation of LGE CMR images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.10595v1-abstract-full').style.display = 'none'; document.getElementById('2205.10595v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MICCAI 2011</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.10572">arXiv:2205.10572</a> <span> [<a href="https://arxiv.org/pdf/2205.10572">pdf</a>, <a href="https://arxiv.org/format/2205.10572">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TBME.2013.2237907">10.1109/TBME.2013.2237907 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Comprehensive 3-D Framework for Automatic Quantification of Late Gadolinium Enhanced Cardiac Magnetic Resonance Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Ying Sun</a>, <a href="/search/eess?searchtype=author&query=Ong%2C+S">Sim-Heng Ong</a>, <a href="/search/eess?searchtype=author&query=Chai%2C+P">Ping Chai</a>, <a href="/search/eess?searchtype=author&query=Teo%2C+L+L">Lynette L Teo</a>, <a href="/search/eess?searchtype=author&query=Low%2C+A+F">Adrian F Low</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.10572v1-abstract-short" style="display: inline;"> Late gadolinium enhanced (LGE) cardiac magnetic resonance (CMR) can directly visualize nonviable myocardium with hyperenhanced intensities with respect to normal myocardium. For heart attack patients, it is crucial to facilitate the decision of appropriate therapy by analyzing and quantifying their LGE CMR images. To achieve accurate quantification, LGE CMR images need to be processed in two steps… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.10572v1-abstract-full').style.display = 'inline'; document.getElementById('2205.10572v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.10572v1-abstract-full" style="display: none;"> Late gadolinium enhanced (LGE) cardiac magnetic resonance (CMR) can directly visualize nonviable myocardium with hyperenhanced intensities with respect to normal myocardium. For heart attack patients, it is crucial to facilitate the decision of appropriate therapy by analyzing and quantifying their LGE CMR images. To achieve accurate quantification, LGE CMR images need to be processed in two steps: segmentation of the myocardium followed by classification of infarcts within the segmented myocardium. However, automatic segmentation is difficult usually due to the intensity heterogeneity of the myocardium and intensity similarity between the infarcts and blood pool. Besides, the slices of an LGE CMR dataset often suffer from spatial and intensity distortions, causing further difficulties in segmentation and classification. In this paper, we present a comprehensive 3-D framework for automatic quantification of LGE CMR images. In this framework, myocardium is segmented with a novel method that deforms coupled endocardial and epicardial meshes and combines information in both short- and long-axis slices, while infarcts are classified with a graph-cut algorithm incorporating intensity and spatial information. Moreover, both spatial and intensity distortions are effectively corrected with specially designed countermeasures. Experiments with 20 sets of real patient data show visually good segmentation and classification results that are quantitatively in strong agreement with those manually obtained by experts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.10572v1-abstract-full').style.display = 'none'; document.getElementById('2205.10572v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Transactions on Biomedical Engineering ( Volume: 60, Issue: 6, June 2013)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.10548">arXiv:2205.10548</a> <span> [<a href="https://arxiv.org/pdf/2205.10548">pdf</a>, <a href="https://arxiv.org/ps/2205.10548">ps</a>, <a href="https://arxiv.org/format/2205.10548">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.media.2013.03.001">10.1016/j.media.2013.03.001 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Three-Dimensional Segmentation of the Left Ventricle in Late Gadolinium Enhanced MR Images of Chronic Infarction Combining Long- and Short-Axis Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Ying Sun</a>, <a href="/search/eess?searchtype=author&query=Ong%2C+S">Sim-Heng Ong</a>, <a href="/search/eess?searchtype=author&query=Chai%2C+P">Ping Chai</a>, <a href="/search/eess?searchtype=author&query=Teo%2C+L+L">Lynette L. Teo</a>, <a href="/search/eess?searchtype=author&query=Low%2C+A+F">Adrian F. Low</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.10548v1-abstract-short" style="display: inline;"> Automatic segmentation of the left ventricle (LV) in late gadolinium enhanced (LGE) cardiac MR (CMR) images is difficult due to the intensity heterogeneity arising from accumulation of contrast agent in infarcted myocardium. In this paper, we present a comprehensive framework for automatic 3D segmentation of the LV in LGE CMR images. Given myocardial contours in cine images as a priori knowledge,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.10548v1-abstract-full').style.display = 'inline'; document.getElementById('2205.10548v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.10548v1-abstract-full" style="display: none;"> Automatic segmentation of the left ventricle (LV) in late gadolinium enhanced (LGE) cardiac MR (CMR) images is difficult due to the intensity heterogeneity arising from accumulation of contrast agent in infarcted myocardium. In this paper, we present a comprehensive framework for automatic 3D segmentation of the LV in LGE CMR images. Given myocardial contours in cine images as a priori knowledge, the framework initially propagates the a priori segmentation from cine to LGE images via 2D translational registration. Two meshes representing respectively endocardial and epicardial surfaces are then constructed with the propagated contours. After construction, the two meshes are deformed towards the myocardial edge points detected in both short-axis and long-axis LGE images in a unified 3D coordinate system. Taking into account the intensity characteristics of the LV in LGE images, we propose a novel parametric model of the LV for consistent myocardial edge points detection regardless of pathological status of the myocardium (infarcted or healthy) and of the type of the LGE images (short-axis or long-axis). We have evaluated the proposed framework with 21 sets of real patient and 4 sets of simulated phantom data. Both distance- and region-based performance metrics confirm the observation that the framework can generate accurate and reliable results for myocardial segmentation of LGE images. We have also tested the robustness of the framework with respect to varied a priori segmentation in both practical and simulated settings. Experimental results show that the proposed framework can greatly compensate variations in the given a priori knowledge and consistently produce accurate segmentations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.10548v1-abstract-full').style.display = 'none'; document.getElementById('2205.10548v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Medical Image Analysis, Volume 17, Issue 6, August 2013, Pages 685-697</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.02844">arXiv:2204.02844</a> <span> [<a href="https://arxiv.org/pdf/2204.02844">pdf</a>, <a href="https://arxiv.org/format/2204.02844">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Learning to Generate Realistic Noisy Images via Pixel-level Noise-aware Adversarial Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cai%2C+Y">Yuanhao Cai</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+X">Xiaowan Hu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Haoqian Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yulun Zhang</a>, <a href="/search/eess?searchtype=author&query=Pfister%2C+H">Hanspeter Pfister</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Donglai Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.02844v2-abstract-short" style="display: inline;"> Existing deep learning real denoising methods require a large amount of noisy-clean image pairs for supervision. Nonetheless, capturing a real noisy-clean dataset is an unacceptable expensive and cumbersome procedure. To alleviate this problem, this work investigates how to generate realistic noisy images. Firstly, we formulate a simple yet reasonable noise model that treats each real noisy pixel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02844v2-abstract-full').style.display = 'inline'; document.getElementById('2204.02844v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.02844v2-abstract-full" style="display: none;"> Existing deep learning real denoising methods require a large amount of noisy-clean image pairs for supervision. Nonetheless, capturing a real noisy-clean dataset is an unacceptable expensive and cumbersome procedure. To alleviate this problem, this work investigates how to generate realistic noisy images. Firstly, we formulate a simple yet reasonable noise model that treats each real noisy pixel as a random variable. This model splits the noisy image generation problem into two sub-problems: image domain alignment and noise domain alignment. Subsequently, we propose a novel framework, namely Pixel-level Noise-aware Generative Adversarial Network (PNGAN). PNGAN employs a pre-trained real denoiser to map the fake and real noisy images into a nearly noise-free solution space to perform image domain alignment. Simultaneously, PNGAN establishes a pixel-level adversarial training to conduct noise domain alignment. Additionally, for better noise fitting, we present an efficient architecture Simple Multi-scale Network (SMNet) as the generator. Qualitative validation shows that noise generated by PNGAN is highly similar to real noise in terms of intensity and distribution. Quantitative experiments demonstrate that a series of denoisers trained with the generated noisy images achieve state-of-the-art (SOTA) results on four real denoising benchmarks. Part of codes, pre-trained models, and results are available at https://github.com/caiyuanhao1998/PNGAN for comparisons. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02844v2-abstract-full').style.display = 'none'; document.getElementById('2204.02844v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.05571">arXiv:2203.05571</a> <span> [<a href="https://arxiv.org/pdf/2203.05571">pdf</a>, <a href="https://arxiv.org/format/2203.05571">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1117/12.2544074">10.1117/12.2544074 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep Convolutional Neural Networks for Molecular Subtyping of Gliomas Using Magnetic Resonance Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yiming Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yinyan Wang</a>, <a href="/search/eess?searchtype=author&query=Qian%2C+T">Tianyi Qian</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.05571v1-abstract-short" style="display: inline;"> Knowledge of molecular subtypes of gliomas can provide valuable information for tailored therapies. This study aimed to investigate the use of deep convolutional neural networks (DCNNs) for noninvasive glioma subtyping with radiological imaging data according to the new taxonomy announced by the World Health Organization in 2016. Methods: A DCNN model was developed for the prediction of the five g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.05571v1-abstract-full').style.display = 'inline'; document.getElementById('2203.05571v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.05571v1-abstract-full" style="display: none;"> Knowledge of molecular subtypes of gliomas can provide valuable information for tailored therapies. This study aimed to investigate the use of deep convolutional neural networks (DCNNs) for noninvasive glioma subtyping with radiological imaging data according to the new taxonomy announced by the World Health Organization in 2016. Methods: A DCNN model was developed for the prediction of the five glioma subtypes based on a hierarchical classification paradigm. This model used three parallel, weight-sharing, deep residual learning networks to process 2.5-dimensional input of trimodal MRI data, including T1-weighted, T1-weighted with contrast enhancement, and T2-weighted images. A data set comprising 1,016 real patients was collected for evaluation of the developed DCNN model. The predictive performance was evaluated via the area under the curve (AUC) from the receiver operating characteristic analysis. For comparison, the performance of a radiomics-based approach was also evaluated. Results: The AUCs of the DCNN model for the four classification tasks in the hierarchical classification paradigm were 0.89, 0.89, 0.85, and 0.66, respectively, as compared to 0.85, 0.75, 0.67, and 0.59 of the radiomics approach. Conclusion: The results showed that the developed DCNN model can predict glioma subtypes with promising performance, given sufficient, non-ill-balanced training data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.05571v1-abstract-full').style.display = 'none'; document.getElementById('2203.05571v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proc. SPIE 11314, Medical Imaging 2020: Computer-Aided Diagnosis</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.03640">arXiv:2203.03640</a> <span> [<a href="https://arxiv.org/pdf/2203.03640">pdf</a>, <a href="https://arxiv.org/format/2203.03640">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TMI.2020.3014433">10.1109/TMI.2020.3014433 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Conquering Data Variations in Resolution: A Slice-Aware Multi-Branch Decoder Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shuxin Wang</a>, <a href="/search/eess?searchtype=author&query=Cao%2C+S">Shilei Cao</a>, <a href="/search/eess?searchtype=author&query=Chai%2C+Z">Zhizhong Chai</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+K">Kai Ma</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Liansheng Wang</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.03640v1-abstract-short" style="display: inline;"> Fully convolutional neural networks have made promising progress in joint liver and liver tumor segmentation. Instead of following the debates over 2D versus 3D networks (for example, pursuing the balance between large-scale 2D pretraining and 3D context), in this paper, we novelly identify the wide variation in the ratio between intra- and inter-slice resolutions as a crucial obstacle to the perf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.03640v1-abstract-full').style.display = 'inline'; document.getElementById('2203.03640v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.03640v1-abstract-full" style="display: none;"> Fully convolutional neural networks have made promising progress in joint liver and liver tumor segmentation. Instead of following the debates over 2D versus 3D networks (for example, pursuing the balance between large-scale 2D pretraining and 3D context), in this paper, we novelly identify the wide variation in the ratio between intra- and inter-slice resolutions as a crucial obstacle to the performance. To tackle the mismatch between the intra- and inter-slice information, we propose a slice-aware 2.5D network that emphasizes extracting discriminative features utilizing not only in-plane semantics but also out-of-plane coherence for each separate slice. Specifically, we present a slice-wise multi-input multi-output architecture to instantiate such a design paradigm, which contains a Multi-Branch Decoder (MD) with a Slice-centric Attention Block (SAB) for learning slice-specific features and a Densely Connected Dice (DCD) loss to regularize the inter-slice predictions to be coherent and continuous. Based on the aforementioned innovations, we achieve state-of-the-art results on the MICCAI 2017 Liver Tumor Segmentation (LiTS) dataset. Besides, we also test our model on the ISBI 2019 Segmentation of THoracic Organs at Risk (SegTHOR) dataset, and the result proves the robustness and generalizability of the proposed method in other segmentation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.03640v1-abstract-full').style.display = 'none'; document.getElementById('2203.03640v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published by IEEE TMI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.02390">arXiv:2203.02390</a> <span> [<a href="https://arxiv.org/pdf/2203.02390">pdf</a>, <a href="https://arxiv.org/format/2203.02390">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-030-87237-3_11">10.1007/978-3-030-87237-3_11 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Simultaneous Alignment and Surface Regression Using Hybrid 2D-3D Networks for 3D Coherent Layer Segmentation of Retina OCT Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hong Liu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+D">Donghuan Lu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuexiang Li</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+K">Kai Ma</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Liansheng Wang</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.02390v1-abstract-short" style="display: inline;"> Automated surface segmentation of retinal layer is important and challenging in analyzing optical coherence tomography (OCT). Recently, many deep learning based methods have been developed for this task and yield remarkable performance. However, due to large spatial gap and potential mismatch between the B-scans of OCT data, all of them are based on 2D segmentation of individual B-scans, which may… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.02390v1-abstract-full').style.display = 'inline'; document.getElementById('2203.02390v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.02390v1-abstract-full" style="display: none;"> Automated surface segmentation of retinal layer is important and challenging in analyzing optical coherence tomography (OCT). Recently, many deep learning based methods have been developed for this task and yield remarkable performance. However, due to large spatial gap and potential mismatch between the B-scans of OCT data, all of them are based on 2D segmentation of individual B-scans, which may loss the continuity information across the B-scans. In addition, 3D surface of the retina layers can provide more diagnostic information, which is crucial in quantitative image analysis. In this study, a novel framework based on hybrid 2D-3D convolutional neural networks (CNNs) is proposed to obtain continuous 3D retinal layer surfaces from OCT. The 2D features of individual B-scans are extracted by an encoder consisting of 2D convolutions. These 2D features are then used to produce the alignment displacement field and layer segmentation by two 3D decoders, which are coupled via a spatial transformer module. The entire framework is trained end-to-end. To the best of our knowledge, this is the first study that attempts 3D retinal layer segmentation in volumetric OCT images based on CNNs. Experiments on a publicly available dataset show that our framework achieves superior results to state-of-the-art 2D methods in terms of both layer segmentation accuracy and cross-B-scan 3D continuity, thus offering more clinical values than previous works. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.02390v1-abstract-full').style.display = 'none'; document.getElementById('2203.02390v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at MICCAI 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.05754">arXiv:2112.05754</a> <span> [<a href="https://arxiv.org/pdf/2112.05754">pdf</a>, <a href="https://arxiv.org/format/2112.05754">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> PyTorch Connectomics: A Scalable and Flexible Segmentation Framework for EM Connectomics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+Z">Zudi Lin</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Donglai Wei</a>, <a href="/search/eess?searchtype=author&query=Lichtman%2C+J">Jeff Lichtman</a>, <a href="/search/eess?searchtype=author&query=Pfister%2C+H">Hanspeter Pfister</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.05754v1-abstract-short" style="display: inline;"> We present PyTorch Connectomics (PyTC), an open-source deep-learning framework for the semantic and instance segmentation of volumetric microscopy images, built upon PyTorch. We demonstrate the effectiveness of PyTC in the field of connectomics, which aims to segment and reconstruct neurons, synapses, and other organelles like mitochondria at nanometer resolution for understanding neuronal communi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.05754v1-abstract-full').style.display = 'inline'; document.getElementById('2112.05754v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.05754v1-abstract-full" style="display: none;"> We present PyTorch Connectomics (PyTC), an open-source deep-learning framework for the semantic and instance segmentation of volumetric microscopy images, built upon PyTorch. We demonstrate the effectiveness of PyTC in the field of connectomics, which aims to segment and reconstruct neurons, synapses, and other organelles like mitochondria at nanometer resolution for understanding neuronal communication, metabolism, and development in animal brains. PyTC is a scalable and flexible toolbox that tackles datasets at different scales and supports multi-task and semi-supervised learning to better exploit expensive expert annotations and the vast amount of unlabeled data during training. Those functionalities can be easily realized in PyTC by changing the configuration options without coding and adapted to other 2D and 3D segmentation tasks for different tissues and imaging modalities. Quantitatively, our framework achieves the best performance in the CREMI challenge for synaptic cleft segmentation (outperforms existing best result by relatively 6.1$\%$) and competitive performance on mitochondria and neuronal nuclei segmentation. Code and tutorials are publicly available at https://connectomics.readthedocs.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.05754v1-abstract-full').style.display = 'none'; document.getElementById('2112.05754v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Technical report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.14795">arXiv:2110.14795</a> <span> [<a href="https://arxiv.org/pdf/2110.14795">pdf</a>, <a href="https://arxiv.org/format/2110.14795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41597-022-01721-8">10.1038/s41597-022-01721-8 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MedMNIST v2 -- A large-scale lightweight benchmark for 2D and 3D biomedical image classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jiancheng Yang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+R">Rui Shi</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Donglai Wei</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Z">Zequan Liu</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+L">Lin Zhao</a>, <a href="/search/eess?searchtype=author&query=Ke%2C+B">Bilian Ke</a>, <a href="/search/eess?searchtype=author&query=Pfister%2C+H">Hanspeter Pfister</a>, <a href="/search/eess?searchtype=author&query=Ni%2C+B">Bingbing Ni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.14795v2-abstract-short" style="display: inline;"> We introduce MedMNIST v2, a large-scale MNIST-like dataset collection of standardized biomedical images, including 12 datasets for 2D and 6 datasets for 3D. All images are pre-processed into a small size of 28x28 (2D) or 28x28x28 (3D) with the corresponding classification labels so that no background knowledge is required for users. Covering primary data modalities in biomedical images, MedMNIST v… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.14795v2-abstract-full').style.display = 'inline'; document.getElementById('2110.14795v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.14795v2-abstract-full" style="display: none;"> We introduce MedMNIST v2, a large-scale MNIST-like dataset collection of standardized biomedical images, including 12 datasets for 2D and 6 datasets for 3D. All images are pre-processed into a small size of 28x28 (2D) or 28x28x28 (3D) with the corresponding classification labels so that no background knowledge is required for users. Covering primary data modalities in biomedical images, MedMNIST v2 is designed to perform classification on lightweight 2D and 3D images with various dataset scales (from 100 to 100,000) and diverse tasks (binary/multi-class, ordinal regression, and multi-label). The resulting dataset, consisting of 708,069 2D images and 10,214 3D images in total, could support numerous research / educational purposes in biomedical image analysis, computer vision, and machine learning. We benchmark several baseline methods on MedMNIST v2, including 2D / 3D neural networks and open-source / commercial AutoML tools. The data and code are publicly available at https://medmnist.com/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.14795v2-abstract-full').style.display = 'none'; document.getElementById('2110.14795v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The data and code are publicly available at https://medmnist.com/. arXiv admin note: text overlap with arXiv:2010.14925</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Scientific Data 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.14805">arXiv:2109.14805</a> <span> [<a href="https://arxiv.org/pdf/2109.14805">pdf</a>, <a href="https://arxiv.org/format/2109.14805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised Landmark Detection Based Spatiotemporal Motion Estimation for 4D Dynamic Medical Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yuyu Guo</a>, <a href="/search/eess?searchtype=author&query=Bi%2C+L">Lei Bi</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dongming Wei</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+L">Liyun Chen</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Z">Zhengbin Zhu</a>, <a href="/search/eess?searchtype=author&query=Feng%2C+D">Dagan Feng</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Ruiyan Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qian Wang</a>, <a href="/search/eess?searchtype=author&query=Kim%2C+J">Jinman Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.14805v3-abstract-short" style="display: inline;"> Motion estimation is a fundamental step in dynamic medical image processing for the assessment of target organ anatomy and function. However, existing image-based motion estimation methods, which optimize the motion field by evaluating the local image similarity, are prone to produce implausible estimation, especially in the presence of large motion. In this study, we provide a novel motion estima… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.14805v3-abstract-full').style.display = 'inline'; document.getElementById('2109.14805v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.14805v3-abstract-full" style="display: none;"> Motion estimation is a fundamental step in dynamic medical image processing for the assessment of target organ anatomy and function. However, existing image-based motion estimation methods, which optimize the motion field by evaluating the local image similarity, are prone to produce implausible estimation, especially in the presence of large motion. In this study, we provide a novel motion estimation framework of Dense-Sparse-Dense (DSD), which comprises two stages. In the first stage, we process the raw dense image to extract sparse landmarks to represent the target organ anatomical topology and discard the redundant information that is unnecessary for motion estimation. For this purpose, we introduce an unsupervised 3D landmark detection network to extract spatially sparse but representative landmarks for the target organ motion estimation. In the second stage, we derive the sparse motion displacement from the extracted sparse landmarks of two images of different time points. Then, we present a motion reconstruction network to construct the motion field by projecting the sparse landmarks displacement back into the dense image domain. Furthermore, we employ the estimated motion field from our two-stage DSD framework as initialization and boost the motion estimation quality in light-weight yet effective iterative optimization. We evaluate our method on two dynamic medical imaging tasks to model cardiac motion and lung respiratory motion, respectively. Our method has produced superior motion estimation accuracy compared to existing comparative methods. Besides, the extensive experimental results demonstrate that our solution can extract well representative anatomical landmarks without any requirement of manual annotation. Our code is publicly available online. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.14805v3-abstract-full').style.display = 'none'; document.getElementById('2109.14805v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by IEEE Transactions on Cybernetics</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.11715">arXiv:2109.11715</a> <span> [<a href="https://arxiv.org/pdf/2109.11715">pdf</a>, <a href="https://arxiv.org/format/2109.11715">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Training Automatic View Planner for Cardiac MR Imaging via Self-Supervision by Spatial Relationship between Views </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+K">Kai Ma</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.11715v1-abstract-short" style="display: inline;"> View planning for the acquisition of cardiac magnetic resonance imaging (CMR) requires acquaintance with the cardiac anatomy and remains a challenging task in clinical practice. Existing approaches to its automation relied either on an additional volumetric image not typically acquired in clinic routine, or on laborious manual annotations of cardiac structural landmarks. This work presents a clini… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.11715v1-abstract-full').style.display = 'inline'; document.getElementById('2109.11715v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.11715v1-abstract-full" style="display: none;"> View planning for the acquisition of cardiac magnetic resonance imaging (CMR) requires acquaintance with the cardiac anatomy and remains a challenging task in clinical practice. Existing approaches to its automation relied either on an additional volumetric image not typically acquired in clinic routine, or on laborious manual annotations of cardiac structural landmarks. This work presents a clinic-compatible and annotation-free system for automatic CMR view planning. The system mines the spatial relationship -- more specifically, locates and exploits the intersecting lines -- between the source and target views, and trains deep networks to regress heatmaps defined by these intersecting lines. As the spatial relationship is self-contained in properly stored data, e.g., in the DICOM format, the need for manual annotation is eliminated. Then, a multi-view planning strategy is proposed to aggregate information from the predicted heatmaps for all the source views of a target view, for a globally optimal prescription. The multi-view aggregation mimics the similar strategy practiced by skilled human prescribers. Experimental results on 181 clinical CMR exams show that our system achieves superior accuracy to existing approaches including conventional atlas-based and newer deep learning based ones, in prescribing four standard CMR views. The mean angle difference and point-to-plane distance evaluated against the ground truth planes are 5.98 degrees and 3.48 mm, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.11715v1-abstract-full').style.display = 'none'; document.getElementById('2109.11715v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by MICCAI 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.09521">arXiv:2109.09521</a> <span> [<a href="https://arxiv.org/pdf/2109.09521">pdf</a>, <a href="https://arxiv.org/format/2109.09521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-030-87193-2_58">10.1007/978-3-030-87193-2_58 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> RibSeg Dataset and Strong Point Cloud Baselines for Rib Segmentation from CT Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+J">Jiancheng Yang</a>, <a href="/search/eess?searchtype=author&query=Gu%2C+S">Shixuan Gu</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Donglai Wei</a>, <a href="/search/eess?searchtype=author&query=Pfister%2C+H">Hanspeter Pfister</a>, <a href="/search/eess?searchtype=author&query=Ni%2C+B">Bingbing Ni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.09521v1-abstract-short" style="display: inline;"> Manual rib inspections in computed tomography (CT) scans are clinically critical but labor-intensive, as 24 ribs are typically elongated and oblique in 3D volumes. Automatic rib segmentation methods can speed up the process through rib measurement and visualization. However, prior arts mostly use in-house labeled datasets that are publicly unavailable and work on dense 3D volumes that are computat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.09521v1-abstract-full').style.display = 'inline'; document.getElementById('2109.09521v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.09521v1-abstract-full" style="display: none;"> Manual rib inspections in computed tomography (CT) scans are clinically critical but labor-intensive, as 24 ribs are typically elongated and oblique in 3D volumes. Automatic rib segmentation methods can speed up the process through rib measurement and visualization. However, prior arts mostly use in-house labeled datasets that are publicly unavailable and work on dense 3D volumes that are computationally inefficient. To address these issues, we develop a labeled rib segmentation benchmark, named \emph{RibSeg}, including 490 CT scans (11,719 individual ribs) from a public dataset. For ground truth generation, we used existing morphology-based algorithms and manually refined its results. Then, considering the sparsity of ribs in 3D volumes, we thresholded and sampled sparse voxels from the input and designed a point cloud-based baseline method for rib segmentation. The proposed method achieves state-of-the-art segmentation performance (Dice~$\approx95\%$) with significant efficiency ($10\sim40\times$ faster than prior arts). The RibSeg dataset, code, and model in PyTorch are available at https://github.com/M3DV/RibSeg. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.09521v1-abstract-full').style.display = 'none'; document.getElementById('2109.09521v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MICCAI 2021. The dataset, code, and model are available at https://github.com/M3DV/RibSeg</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.07979">arXiv:2108.07979</a> <span> [<a href="https://arxiv.org/pdf/2108.07979">pdf</a>, <a href="https://arxiv.org/format/2108.07979">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A New Bidirectional Unsupervised Domain Adaptation Segmentation Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ning%2C+M">Munan Ning</a>, <a href="/search/eess?searchtype=author&query=Bian%2C+C">Cheng Bian</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+C">Chenglang Yuan</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yaohua Wang</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yang Guo</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+K">Kai Ma</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.07979v1-abstract-short" style="display: inline;"> Domain shift happens in cross-domain scenarios commonly because of the wide gaps between different domains: when applying a deep learning model well-trained in one domain to another target domain, the model usually performs poorly. To tackle this problem, unsupervised domain adaptation (UDA) techniques are proposed to bridge the gap between different domains, for the purpose of improving model per… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.07979v1-abstract-full').style.display = 'inline'; document.getElementById('2108.07979v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.07979v1-abstract-full" style="display: none;"> Domain shift happens in cross-domain scenarios commonly because of the wide gaps between different domains: when applying a deep learning model well-trained in one domain to another target domain, the model usually performs poorly. To tackle this problem, unsupervised domain adaptation (UDA) techniques are proposed to bridge the gap between different domains, for the purpose of improving model performance without annotation in the target domain. Particularly, UDA has a great value for multimodal medical image analysis, where annotation difficulty is a practical concern. However, most existing UDA methods can only achieve satisfactory improvements in one adaptation direction (e.g., MRI to CT), but often perform poorly in the other (CT to MRI), limiting their practical usage. In this paper, we propose a bidirectional UDA (BiUDA) framework based on disentangled representation learning for equally competent two-way UDA performances. This framework employs a unified domain-aware pattern encoder which not only can adaptively encode images in different domains through a domain controller, but also improve model efficiency by eliminating redundant parameters. Furthermore, to avoid distortion of contents and patterns of input images during the adaptation process, a content-pattern consistency loss is introduced. Additionally, for better UDA segmentation performance, a label consistency strategy is proposed to provide extra supervision by recomposing target-domain-styled images and corresponding source-domain annotations. Comparison experiments and ablation studies conducted on two public datasets demonstrate the superiority of our BiUDA framework to current state-of-the-art UDA methods and the effectiveness of its novel designs. By successfully addressing two-way adaptations, our BiUDA framework offers a flexible solution of UDA techniques to the real-world scenario. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.07979v1-abstract-full').style.display = 'none'; document.getElementById('2108.07979v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IPMI 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.07701">arXiv:2005.07701</a> <span> [<a href="https://arxiv.org/pdf/2005.07701">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> Optical image decomposition and noise filtering based on Laguerre-Gaussian modes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ma%2C+J">Jiantao Ma</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dan Wei</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+H">Haocheng Yang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yong Zhang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+M">Min Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.07701v1-abstract-short" style="display: inline;"> We propose and experimentally demonstrate an efficient image decomposition in the Laguerre-Gaussian (LG) domain. By developing an advanced computing method, the sampling points are much fewer than those in the existing methods, which can significantly improve the calculation efficiency. The beam waist, azimuthal and radial truncation orders of the LG modes are optimized depending on the image info… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.07701v1-abstract-full').style.display = 'inline'; document.getElementById('2005.07701v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.07701v1-abstract-full" style="display: none;"> We propose and experimentally demonstrate an efficient image decomposition in the Laguerre-Gaussian (LG) domain. By developing an advanced computing method, the sampling points are much fewer than those in the existing methods, which can significantly improve the calculation efficiency. The beam waist, azimuthal and radial truncation orders of the LG modes are optimized depending on the image information to be restored. In the experiment, we decompose an image by using about 30000 LG modes and realize a high-fidelity reconstruction. Furthermore, we show image noise reduction through LG domain filtering. Our results open a door for LG-mode based image processing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.07701v1-abstract-full').style.display = 'none'; document.getElementById('2005.07701v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.07428">arXiv:2005.07428</a> <span> [<a href="https://arxiv.org/pdf/2005.07428">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1364/OE.403521">10.1364/OE.403521 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Laguerre-Gaussian transform for rotating image processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dan Wei</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+J">Jiantao Ma</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+T">Tianxin Wang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+C">Chuan Xu</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+Y">Yin Cai</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Lidan Zhang</a>, <a href="/search/eess?searchtype=author&query=Fang%2C+X">Xinyuan Fang</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dunzhao Wei</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+S">Shining Zhu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yong Zhang</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+M">Min Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.07428v1-abstract-short" style="display: inline;"> Rotation is a common motional form in nature, existing from atoms and molecules, industrial turbines to astronomical objects. However, it still lacks an efficient and reliable method for real-time image processing of a fast-rotating object. Since the Fourier spectrum of a rotating object changes rapidly, the traditional Fourier transform (FT) techniques become extremely complicated and time consum… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.07428v1-abstract-full').style.display = 'inline'; document.getElementById('2005.07428v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.07428v1-abstract-full" style="display: none;"> Rotation is a common motional form in nature, existing from atoms and molecules, industrial turbines to astronomical objects. However, it still lacks an efficient and reliable method for real-time image processing of a fast-rotating object. Since the Fourier spectrum of a rotating object changes rapidly, the traditional Fourier transform (FT) techniques become extremely complicated and time consuming. Here, we propose a Laguerre-Gaussian (LG) transform to analyze the rotating object with LG-mode basis. The rotation operation provides a feasible way to acquire LG spectrum, which is similar to the function of lens in FT. Particularly, the obtained LG spectrum does not change even the object working at a high rotating speed. By analyzing the LG spectrum, one can perform image processing such as reconstruction, edge enhancement, and pattern replication. Such LG transform provides an efficient and convenient way to real-time monitor and analyze a fast-rotating object in scientific research and industry. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.07428v1-abstract-full').style.display = 'none'; document.getElementById('2005.07428v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2001.03857">arXiv:2001.03857</a> <span> [<a href="https://arxiv.org/pdf/2001.03857">pdf</a>, <a href="https://arxiv.org/format/2001.03857">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Robust Brain Magnetic Resonance Image Segmentation for Hydrocephalus Patients: Hard and Soft Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ren%2C+X">Xuhua Ren</a>, <a href="/search/eess?searchtype=author&query=Huo%2C+J">Jiayu Huo</a>, <a href="/search/eess?searchtype=author&query=Xuan%2C+K">Kai Xuan</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dongming Wei</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Lichi Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qian Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2001.03857v1-abstract-short" style="display: inline;"> Brain magnetic resonance (MR) segmentation for hydrocephalus patients is considered as a challenging work. Encoding the variation of the brain anatomical structures from different individuals cannot be easily achieved. The task becomes even more difficult especially when the image data from hydrocephalus patients are considered, which often have large deformations and differ significantly from the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.03857v1-abstract-full').style.display = 'inline'; document.getElementById('2001.03857v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2001.03857v1-abstract-full" style="display: none;"> Brain magnetic resonance (MR) segmentation for hydrocephalus patients is considered as a challenging work. Encoding the variation of the brain anatomical structures from different individuals cannot be easily achieved. The task becomes even more difficult especially when the image data from hydrocephalus patients are considered, which often have large deformations and differ significantly from the normal subjects. Here, we propose a novel strategy with hard and soft attention modules to solve the segmentation problems for hydrocephalus MR images. Our main contributions are three-fold: 1) the hard-attention module generates coarse segmentation map using multi-atlas-based method and the VoxelMorph tool, which guides subsequent segmentation process and improves its robustness; 2) the soft-attention module incorporates position attention to capture precise context information, which further improves the segmentation accuracy; 3) we validate our method by segmenting insula, thalamus and many other regions-of-interests (ROIs) that are critical to quantify brain MR images of hydrocephalus patients in real clinical scenario. The proposed method achieves much improved robustness and accuracy when segmenting all 17 consciousness-related ROIs with high variations for different subjects. To the best of our knowledge, this is the first work to employ deep learning for solving the brain segmentation problems of hydrocephalus patients. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.03857v1-abstract-full').style.display = 'none'; document.getElementById('2001.03857v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ISBI 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1907.13020">arXiv:1907.13020</a> <span> [<a href="https://arxiv.org/pdf/1907.13020">pdf</a>, <a href="https://arxiv.org/format/1907.13020">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Synthesis and Inpainting-Based MR-CT Registration for Image-Guided Thermal Ablation of Liver Tumors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dongming Wei</a>, <a href="/search/eess?searchtype=author&query=Ahmad%2C+S">Sahar Ahmad</a>, <a href="/search/eess?searchtype=author&query=Huo%2C+J">Jiayu Huo</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+W">Wen Peng</a>, <a href="/search/eess?searchtype=author&query=Ge%2C+Y">Yunhao Ge</a>, <a href="/search/eess?searchtype=author&query=Xue%2C+Z">Zhong Xue</a>, <a href="/search/eess?searchtype=author&query=Yap%2C+P">Pew-Thian Yap</a>, <a href="/search/eess?searchtype=author&query=Li%2C+W">Wentao Li</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+D">Dinggang Shen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qian Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1907.13020v1-abstract-short" style="display: inline;"> Thermal ablation is a minimally invasive procedure for treat-ing small or unresectable tumors. Although CT is widely used for guiding ablation procedures, the contrast of tumors against surrounding normal tissues in CT images is often poor, aggravating the difficulty in accurate thermal ablation. In this paper, we propose a fast MR-CT image registration method to overlay a pre-procedural MR (pMR)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.13020v1-abstract-full').style.display = 'inline'; document.getElementById('1907.13020v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1907.13020v1-abstract-full" style="display: none;"> Thermal ablation is a minimally invasive procedure for treat-ing small or unresectable tumors. Although CT is widely used for guiding ablation procedures, the contrast of tumors against surrounding normal tissues in CT images is often poor, aggravating the difficulty in accurate thermal ablation. In this paper, we propose a fast MR-CT image registration method to overlay a pre-procedural MR (pMR) image onto an intra-procedural CT (iCT) image for guiding the thermal ablation of liver tumors. By first using a Cycle-GAN model with mutual information constraint to generate synthesized CT (sCT) image from the cor-responding pMR, pre-procedural MR-CT image registration is carried out through traditional mono-modality CT-CT image registration. At the intra-procedural stage, a partial-convolution-based network is first used to inpaint the probe and its artifacts in the iCT image. Then, an unsupervised registration network is used to efficiently align the pre-procedural CT (pCT) with the inpainted iCT (inpCT) image. The final transformation from pMR to iCT is obtained by combining the two estimated transformations,i.e., (1) from the pMR image space to the pCT image space (through sCT) and (2) from the pCT image space to the iCT image space (through inpCT). Experimental results confirm that the proposed method achieves high registration accuracy with a very fast computational speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.13020v1-abstract-full').style.display = 'none'; document.getElementById('1907.13020v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in MICCAI 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.02031">arXiv:1906.02031</a> <span> [<a href="https://arxiv.org/pdf/1906.02031">pdf</a>, <a href="https://arxiv.org/ps/1906.02031">ps</a>, <a href="https://arxiv.org/format/1906.02031">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OctopusNet: A Deep Learning Segmentation Network for Multi-modal Medical Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yu Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jiawei Chen</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuexiang Li</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.02031v2-abstract-short" style="display: inline;"> Deep learning models, such as the fully convolutional network (FCN), have been widely used in 3D biomedical segmentation and achieved state-of-the-art performance. Multiple modalities are often used for disease diagnosis and quantification. Two approaches are widely used in the literature to fuse multiple modalities in the segmentation networks: early-fusion (which stacks multiple modalities as di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.02031v2-abstract-full').style.display = 'inline'; document.getElementById('1906.02031v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.02031v2-abstract-full" style="display: none;"> Deep learning models, such as the fully convolutional network (FCN), have been widely used in 3D biomedical segmentation and achieved state-of-the-art performance. Multiple modalities are often used for disease diagnosis and quantification. Two approaches are widely used in the literature to fuse multiple modalities in the segmentation networks: early-fusion (which stacks multiple modalities as different input channels) and late-fusion (which fuses the segmentation results from different modalities at the very end). These fusion methods easily suffer from the cross-modal interference caused by the input modalities which have wide variations. To address the problem, we propose a novel deep learning architecture, namely OctopusNet, to better leverage and fuse the information contained in multi-modalities. The proposed framework employs a separate encoder for each modality for feature extraction and exploits a hyper-fusion decoder to fuse the extracted features while avoiding feature explosion. We evaluate the proposed OctopusNet on two publicly available datasets, i.e. ISLES-2018 and MRBrainS-2013. The experimental results show that our framework outperforms the commonly-used feature fusion approaches and yields the state-of-the-art segmentation accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.02031v2-abstract-full').style.display = 'none'; document.getElementById('1906.02031v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1401.2181">arXiv:1401.2181</a> <span> [<a href="https://arxiv.org/pdf/1401.2181">pdf</a>, <a href="https://arxiv.org/ps/1401.2181">ps</a>, <a href="https://arxiv.org/format/1401.2181">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> A biologically inspired model for transshipment problem </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gao%2C+C">Cai Gao</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+C">Chao Yan</a>, <a href="/search/eess?searchtype=author&query=Wei%2C+D">Daijun Wei</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Y">Yong Hu</a>, <a href="/search/eess?searchtype=author&query=Mahadevan%2C+S">Sankaran Mahadevan</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+Y">Yong Deng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1401.2181v1-abstract-short" style="display: inline;"> Transshipment problem is one of the basic operational research problems. In this paper, our first work is to develop a biologically inspired mathematical model for a dynamical system, which is first used to solve minimum cost flow problem. It has lower computational complexity than Physarum Solver. Second, we apply the proposed model to solve the traditional transshipment problem. Compared with th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1401.2181v1-abstract-full').style.display = 'inline'; document.getElementById('1401.2181v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1401.2181v1-abstract-full" style="display: none;"> Transshipment problem is one of the basic operational research problems. In this paper, our first work is to develop a biologically inspired mathematical model for a dynamical system, which is first used to solve minimum cost flow problem. It has lower computational complexity than Physarum Solver. Second, we apply the proposed model to solve the traditional transshipment problem. Compared with the conditional methods, experiment results show the provided model is simple, effective as well as handling problem in a continuous manner. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1401.2181v1-abstract-full').style.display = 'none'; document.getElementById('1401.2181v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, 2 figures</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository