Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 139 results for author: <span class="mathjax">Bagci, U</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Bagci, U"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Bagci%2C+U&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Bagci, U"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Bagci%2C+U&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Bagci%2C+U&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Bagci%2C+U&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Bagci%2C+U&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10071">arXiv:2411.10071</a> <span> [<a href="https://arxiv.org/pdf/2411.10071">pdf</a>, <a href="https://arxiv.org/format/2411.10071">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Evidential Federated Learning for Skin Lesion Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hendrix%2C+R">Rutger Hendrix</a>, <a href="/search/?searchtype=author&query=Salanitri%2C+F+P">Federica Proietto Salanitri</a>, <a href="/search/?searchtype=author&query=Spampinato%2C+C">Concetto Spampinato</a>, <a href="/search/?searchtype=author&query=Palazzo%2C+S">Simone Palazzo</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10071v1-abstract-short" style="display: inline;"> We introduce FedEvPrompt, a federated learning approach that integrates principles of evidential deep learning, prompt tuning, and knowledge distillation for distributed skin lesion classification. FedEvPrompt leverages two sets of prompts: b-prompts (for low-level basic visual knowledge) and t-prompts (for task-specific knowledge) prepended to frozen pre-trained Vision Transformer (ViT) models tr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10071v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10071v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10071v1-abstract-full" style="display: none;"> We introduce FedEvPrompt, a federated learning approach that integrates principles of evidential deep learning, prompt tuning, and knowledge distillation for distributed skin lesion classification. FedEvPrompt leverages two sets of prompts: b-prompts (for low-level basic visual knowledge) and t-prompts (for task-specific knowledge) prepended to frozen pre-trained Vision Transformer (ViT) models trained in an evidential learning framework to maximize class evidences. Crucially, knowledge sharing across federation clients is achieved only through knowledge distillation on attention maps generated by the local ViT models, ensuring enhanced privacy preservation compared to traditional parameter or synthetic image sharing methodologies. FedEvPrompt is optimized within a round-based learning paradigm, where each round involves training local models followed by attention maps sharing with all federation clients. Experimental validation conducted in a real distributed setting, on the ISIC2019 dataset, demonstrates the superior performance of FedEvPrompt against baseline federated learning algorithms and knowledge distillation methods, without sharing model parameters. In conclusion, FedEvPrompt offers a promising approach for federated learning, effectively addressing challenges such as data heterogeneity, imbalance, privacy preservation, and knowledge sharing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10071v1-abstract-full').style.display = 'none'; document.getElementById('2411.10071v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a conference paper at ICPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05697">arXiv:2411.05697</a> <span> [<a href="https://arxiv.org/pdf/2411.05697">pdf</a>, <a href="https://arxiv.org/format/2411.05697">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> IPMN Risk Assessment under Federated Learning Paradigm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Pan%2C+H">Hongyi Pan</a>, <a href="/search/?searchtype=author&query=Hong%2C+Z">Ziliang Hong</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Keles%2C+E">Elif Keles</a>, <a href="/search/?searchtype=author&query=Aktas%2C+H+E">Halil Ertugrul Aktas</a>, <a href="/search/?searchtype=author&query=Taktak%2C+Y">Yavuz Taktak</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Spampinato%2C+C">Concetto Spampinato</a>, <a href="/search/?searchtype=author&query=Schoots%2C+I">Ivo Schoots</a>, <a href="/search/?searchtype=author&query=Bruno%2C+M+J">Marco J. Bruno</a>, <a href="/search/?searchtype=author&query=Tiwari%2C+P">Pallavi Tiwari</a>, <a href="/search/?searchtype=author&query=Bolan%2C+C">Candice Bolan</a>, <a href="/search/?searchtype=author&query=Gonda%2C+T">Tamas Gonda</a>, <a href="/search/?searchtype=author&query=Miller%2C+F">Frank Miller</a>, <a href="/search/?searchtype=author&query=Keswani%2C+R+N">Rajesh N. Keswani</a>, <a href="/search/?searchtype=author&query=Wallace%2C+M+B">Michael B. Wallace</a>, <a href="/search/?searchtype=author&query=Xu%2C+Z">Ziyue Xu</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05697v1-abstract-short" style="display: inline;"> Accurate classification of Intraductal Papillary Mucinous Neoplasms (IPMN) is essential for identifying high-risk cases that require timely intervention. In this study, we develop a federated learning framework for multi-center IPMN classification utilizing a comprehensive pancreas MRI dataset. This dataset includes 653 T1-weighted and 656 T2-weighted MRI images, accompanied by corresponding IPMN… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05697v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05697v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05697v1-abstract-full" style="display: none;"> Accurate classification of Intraductal Papillary Mucinous Neoplasms (IPMN) is essential for identifying high-risk cases that require timely intervention. In this study, we develop a federated learning framework for multi-center IPMN classification utilizing a comprehensive pancreas MRI dataset. This dataset includes 653 T1-weighted and 656 T2-weighted MRI images, accompanied by corresponding IPMN risk scores from 7 leading medical institutions, making it the largest and most diverse dataset for IPMN classification to date. We assess the performance of DenseNet-121 in both centralized and federated settings for training on distributed data. Our results demonstrate that the federated learning approach achieves high classification accuracy comparable to centralized learning while ensuring data privacy across institutions. This work marks a significant advancement in collaborative IPMN classification, facilitating secure and high-accuracy model training across multiple centers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05697v1-abstract-full').style.display = 'none'; document.getElementById('2411.05697v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01390">arXiv:2411.01390</a> <span> [<a href="https://arxiv.org/pdf/2411.01390">pdf</a>, <a href="https://arxiv.org/format/2411.01390">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A New Logic For Pediatric Brain Tumor Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Bengtsson%2C+M">Max Bengtsson</a>, <a href="/search/?searchtype=author&query=Keles%2C+E">Elif Keles</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Anwar%2C+S">Syed Anwar</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y+S">Yuri S. Velichko</a>, <a href="/search/?searchtype=author&query=Linguraru%2C+M+G">Marius G. Linguraru</a>, <a href="/search/?searchtype=author&query=Waanders%2C+A+J">Angela J. Waanders</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01390v1-abstract-short" style="display: inline;"> In this paper, we present a novel approach for segmenting pediatric brain tumors using a deep learning architecture, inspired by expert radiologists' segmentation strategies. Our model delineates four distinct tumor labels and is benchmarked on a held-out PED BraTS 2024 test set (i.e., pediatric brain tumor datasets introduced by BraTS). Furthermore, we evaluate our model's performance against the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01390v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01390v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01390v1-abstract-full" style="display: none;"> In this paper, we present a novel approach for segmenting pediatric brain tumors using a deep learning architecture, inspired by expert radiologists' segmentation strategies. Our model delineates four distinct tumor labels and is benchmarked on a held-out PED BraTS 2024 test set (i.e., pediatric brain tumor datasets introduced by BraTS). Furthermore, we evaluate our model's performance against the state-of-the-art (SOTA) model using a new external dataset of 30 patients from CBTN (Children's Brain Tumor Network), labeled in accordance with the PED BraTS 2024 guidelines. We compare segmentation outcomes with the winning algorithm from the PED BraTS 2023 challenge as the SOTA model. Our proposed algorithm achieved an average Dice score of 0.642 and an HD95 of 73.0 mm on the CBTN test data, outperforming the SOTA model, which achieved a Dice score of 0.626 and an HD95 of 84.0 mm. Our results indicate that the proposed model is a step towards providing more accurate segmentation for pediatric brain tumors, which is essential for evaluating therapy response and monitoring patient progress. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01390v1-abstract-full').style.display = 'none'; document.getElementById('2411.01390v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22530">arXiv:2410.22530</a> <span> [<a href="https://arxiv.org/pdf/2410.22530">pdf</a>, <a href="https://arxiv.org/format/2410.22530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Aggregation Weights for Federated Segmentation of Pancreas MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Pan%2C+H">Hongyi Pan</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Taktak%2C+Y">Yavuz Taktak</a>, <a href="/search/?searchtype=author&query=Keles%2C+E">Elif Keles</a>, <a href="/search/?searchtype=author&query=Aktas%2C+H+E">Halil Ertugrul Aktas</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Spampinato%2C+C">Concetto Spampinato</a>, <a href="/search/?searchtype=author&query=Schoots%2C+I">Ivo Schoots</a>, <a href="/search/?searchtype=author&query=Bruno%2C+M+J">Marco J. Bruno</a>, <a href="/search/?searchtype=author&query=Keswani%2C+R+N">Rajesh N. Keswani</a>, <a href="/search/?searchtype=author&query=Tiwari%2C+P">Pallavi Tiwari</a>, <a href="/search/?searchtype=author&query=Bolan%2C+C">Candice Bolan</a>, <a href="/search/?searchtype=author&query=Gonda%2C+T">Tamas Gonda</a>, <a href="/search/?searchtype=author&query=Goggins%2C+M+G">Michael G. Goggins</a>, <a href="/search/?searchtype=author&query=Wallace%2C+M+B">Michael B. Wallace</a>, <a href="/search/?searchtype=author&query=Xu%2C+Z">Ziyue Xu</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22530v2-abstract-short" style="display: inline;"> Federated learning (FL) enables collaborative model training across institutions without sharing sensitive data, making it an attractive solution for medical imaging tasks. However, traditional FL methods, such as Federated Averaging (FedAvg), face difficulties in generalizing across domains due to variations in imaging protocols and patient demographics across institutions. This challenge is part… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22530v2-abstract-full').style.display = 'inline'; document.getElementById('2410.22530v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22530v2-abstract-full" style="display: none;"> Federated learning (FL) enables collaborative model training across institutions without sharing sensitive data, making it an attractive solution for medical imaging tasks. However, traditional FL methods, such as Federated Averaging (FedAvg), face difficulties in generalizing across domains due to variations in imaging protocols and patient demographics across institutions. This challenge is particularly evident in pancreas MRI segmentation, where anatomical variability and imaging artifacts significantly impact performance. In this paper, we conduct a comprehensive evaluation of FL algorithms for pancreas MRI segmentation and introduce a novel approach that incorporates adaptive aggregation weights. By dynamically adjusting the contribution of each client during model aggregation, our method accounts for domain-specific differences and improves generalization across heterogeneous datasets. Experimental results demonstrate that our approach enhances segmentation accuracy and reduces the impact of domain shift compared to conventional FL methods while maintaining privacy-preserving capabilities. Significant performance improvements are observed across multiple hospitals (centers). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22530v2-abstract-full').style.display = 'none'; document.getElementById('2410.22530v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16296">arXiv:2410.16296</a> <span> [<a href="https://arxiv.org/pdf/2410.16296">pdf</a>, <a href="https://arxiv.org/format/2410.16296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CirrMRI600+: Large Scale MRI Collection and Segmentation of Cirrhotic Liver </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Susladkar%2C+O+K">Onkar Kishor Susladkar</a>, <a href="/search/?searchtype=author&query=Gorade%2C+V">Vandan Gorade</a>, <a href="/search/?searchtype=author&query=Keles%2C+E">Elif Keles</a>, <a href="/search/?searchtype=author&query=Antalek%2C+M">Matthew Antalek</a>, <a href="/search/?searchtype=author&query=Seyithanoglu%2C+D">Deniz Seyithanoglu</a>, <a href="/search/?searchtype=author&query=Cebeci%2C+T">Timurhan Cebeci</a>, <a href="/search/?searchtype=author&query=Aktas%2C+H+E">Halil Ertugrul Aktas</a>, <a href="/search/?searchtype=author&query=Kartal%2C+G+D">Gulbiz Dagoglu Kartal</a>, <a href="/search/?searchtype=author&query=Kaymakoglu%2C+S">Sabahattin Kaymakoglu</a>, <a href="/search/?searchtype=author&query=Erturk%2C+S+M">Sukru Mehmet Erturk</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yuri Velichko</a>, <a href="/search/?searchtype=author&query=Ladner%2C+D">Daniela Ladner</a>, <a href="/search/?searchtype=author&query=Borhani%2C+A+A">Amir A. Borhani</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16296v1-abstract-short" style="display: inline;"> Liver cirrhosis, the end stage of chronic liver disease, is characterized by extensive bridging fibrosis and nodular regeneration, leading to an increased risk of liver failure, complications of portal hypertension, malignancy and death. Early diagnosis and management of end-stage cirrhosis are significant clinical challenges. Magnetic resonance imaging (MRI) is a widely available, non-invasive im… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16296v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16296v1-abstract-full" style="display: none;"> Liver cirrhosis, the end stage of chronic liver disease, is characterized by extensive bridging fibrosis and nodular regeneration, leading to an increased risk of liver failure, complications of portal hypertension, malignancy and death. Early diagnosis and management of end-stage cirrhosis are significant clinical challenges. Magnetic resonance imaging (MRI) is a widely available, non-invasive imaging technique for cirrhosis assessment. However, the stage of liver fibrosis cannot be easily differentiated. Moreover, the fibrotic liver tissue (cirrhotic liver) causes significant change in liver enhancement, morphology and signal characteristics, which poses substantial challenges for the development of computer-aided diagnostic applications. Deep learning (DL) offers a promising solution for automatically segmenting and recognizing cirrhotic livers in MRI scans, potentially enabling fibrosis stage classification. However, the lack of datasets specifically focused on cirrhotic livers has hindered progress. CirrMRI600+ addresses this critical gap. This extensive dataset, the first of its kind, comprises 628 high-resolution abdominal MRI scans (310 T1-weighted and 318 T2-weighted, totaling nearly 40,000 slices) with annotated segmentation labels for cirrhotic livers. Unlike previous datasets, CirrMRI600+ specifically focuses on cirrhotic livers, capturing the complexities of this disease state. The link to the dataset is made publicly available at: https://osf.io/cuk24/. We also share 11 baseline deep learning segmentation methods used in our rigorous benchmarking experiments: https://github.com/NUBagciLab/CirrMRI600Plus. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16296v1-abstract-full').style.display = 'none'; document.getElementById('2410.16296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12214">arXiv:2410.12214</a> <span> [<a href="https://arxiv.org/pdf/2410.12214">pdf</a>, <a href="https://arxiv.org/format/2410.12214">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Order-aware Interactive Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+B">Bin Wang</a>, <a href="/search/?searchtype=author&query=Choudhuri%2C+A">Anwesa Choudhuri</a>, <a href="/search/?searchtype=author&query=Zheng%2C+M">Meng Zheng</a>, <a href="/search/?searchtype=author&query=Gao%2C+Z">Zhongpai Gao</a>, <a href="/search/?searchtype=author&query=Planche%2C+B">Benjamin Planche</a>, <a href="/search/?searchtype=author&query=Deng%2C+A">Andong Deng</a>, <a href="/search/?searchtype=author&query=Liu%2C+Q">Qin Liu</a>, <a href="/search/?searchtype=author&query=Chen%2C+T">Terrence Chen</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Wu%2C+Z">Ziyan Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12214v2-abstract-short" style="display: inline;"> Interactive segmentation aims to accurately segment target objects with minimal user interactions. However, current methods often fail to accurately separate target objects from the background, due to a limited understanding of order, the relative depth between objects in a scene. To address this issue, we propose OIS: order-aware interactive segmentation, where we explicitly encode the relative d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12214v2-abstract-full').style.display = 'inline'; document.getElementById('2410.12214v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12214v2-abstract-full" style="display: none;"> Interactive segmentation aims to accurately segment target objects with minimal user interactions. However, current methods often fail to accurately separate target objects from the background, due to a limited understanding of order, the relative depth between objects in a scene. To address this issue, we propose OIS: order-aware interactive segmentation, where we explicitly encode the relative depth between objects into order maps. We introduce a novel order-aware attention, where the order maps seamlessly guide the user interactions (in the form of clicks) to attend to the image features. We further present an object-aware attention module to incorporate a strong object-level understanding to better differentiate objects with similar order. Our approach allows both dense and sparse integration of user clicks, enhancing both accuracy and efficiency as compared to prior works. Experimental results demonstrate that OIS achieves state-of-the-art performance, improving mIoU after one click by 7.61 on the HQSeg44K dataset and 1.32 on the DAVIS dataset as compared to the previous state-of-the-art SegNext, while also doubling inference speed compared to current leading methods. The project page is https://ukaukaaaa.github.io/projects/OIS/index.html <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12214v2-abstract-full').style.display = 'none'; document.getElementById('2410.12214v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Interactive demo can be found in project page: https://ukaukaaaa.github.io/projects/OIS/index.html</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02044">arXiv:2410.02044</a> <span> [<a href="https://arxiv.org/pdf/2410.02044">pdf</a>, <a href="https://arxiv.org/format/2410.02044">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Frequency-Based Federated Domain Generalization for Polyp Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Pan%2C+H">Hongyi Pan</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02044v1-abstract-short" style="display: inline;"> Federated Learning (FL) offers a powerful strategy for training machine learning models across decentralized datasets while maintaining data privacy, yet domain shifts among clients can degrade performance, particularly in medical imaging tasks like polyp segmentation. This paper introduces a novel Frequency-Based Domain Generalization (FDG) framework, utilizing soft- and hard-thresholding in the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02044v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02044v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02044v1-abstract-full" style="display: none;"> Federated Learning (FL) offers a powerful strategy for training machine learning models across decentralized datasets while maintaining data privacy, yet domain shifts among clients can degrade performance, particularly in medical imaging tasks like polyp segmentation. This paper introduces a novel Frequency-Based Domain Generalization (FDG) framework, utilizing soft- and hard-thresholding in the Fourier domain to address these challenges. By applying soft- and hard-thresholding to Fourier coefficients, our method generates new images with reduced background noise and enhances the model's ability to generalize across diverse medical imaging domains. Extensive experiments demonstrate substantial improvements in segmentation accuracy and domain robustness over baseline methods. This innovation integrates frequency domain techniques into FL, presenting a resilient approach to overcoming domain variability in decentralized medical image analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02044v1-abstract-full').style.display = 'none'; document.getElementById('2410.02044v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05875">arXiv:2409.05875</a> <span> [<a href="https://arxiv.org/pdf/2409.05875">pdf</a>, <a href="https://arxiv.org/format/2409.05875">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Transformer-Enhanced Iterative Feedback Mechanism for Polyp Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tomar%2C+N+K">Nikhil Kumar Tomar</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Berzin%2C+T+M">Tyler M. Berzin</a>, <a href="/search/?searchtype=author&query=Keswani%2C+R">Rajesh Keswani</a>, <a href="/search/?searchtype=author&query=Wallace%2C+M">Michael Wallace</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05875v1-abstract-short" style="display: inline;"> Colorectal cancer (CRC) is the third most common cause of cancer diagnosed in the United States and the second leading cause of cancer-related death among both genders. Notably, CRC is the leading cause of cancer in younger men less than 50 years old. Colonoscopy is considered the gold standard for the early diagnosis of CRC. Skills vary significantly among endoscopists, and a high miss rate is re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05875v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05875v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05875v1-abstract-full" style="display: none;"> Colorectal cancer (CRC) is the third most common cause of cancer diagnosed in the United States and the second leading cause of cancer-related death among both genders. Notably, CRC is the leading cause of cancer in younger men less than 50 years old. Colonoscopy is considered the gold standard for the early diagnosis of CRC. Skills vary significantly among endoscopists, and a high miss rate is reported. Automated polyp segmentation can reduce the missed rates, and timely treatment is possible in the early stage. To address this challenge, we introduce \textit{\textbf{\ac{FANetv2}}}, an advanced encoder-decoder network designed to accurately segment polyps from colonoscopy images. Leveraging an initial input mask generated by Otsu thresholding, FANetv2 iteratively refines its binary segmentation masks through a novel feedback attention mechanism informed by the mask predictions of previous epochs. Additionally, it employs a text-guided approach that integrates essential information about the number (one or many) and size (small, medium, large) of polyps to further enhance its feature representation capabilities. This dual-task approach facilitates accurate polyp segmentation and aids in the auxiliary classification of polyp attributes, significantly boosting the model's performance. Our comprehensive evaluations on the publicly available BKAI-IGH and CVC-ClinicDB datasets demonstrate the superior performance of FANetv2, evidenced by high dice similarity coefficients (DSC) of 0.9186 and 0.9481, along with low Hausdorff distances of 2.83 and 3.19, respectively. The source code for FANetv2 is available at https://github.com/xxxxx/FANetv2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05875v1-abstract-full').style.display = 'none'; document.getElementById('2409.05875v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00045">arXiv:2409.00045</a> <span> [<a href="https://arxiv.org/pdf/2409.00045">pdf</a>, <a href="https://arxiv.org/format/2409.00045">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PolypDB: A Curated Multi-Center Dataset for Development of AI Algorithms in Colonoscopy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Tomar%2C+N+K">Nikhil Kumar Tomar</a>, <a href="/search/?searchtype=author&query=Sharma%2C+V">Vanshali Sharma</a>, <a href="/search/?searchtype=author&query=Trinh%2C+Q">Quoc-Huy Trinh</a>, <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Pan%2C+H">Hongyi Pan</a>, <a href="/search/?searchtype=author&query=Jha%2C+R+K">Ritika K. Jha</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Hann%2C+A">Alexander Hann</a>, <a href="/search/?searchtype=author&query=Varkey%2C+J">Jonas Varkey</a>, <a href="/search/?searchtype=author&query=Dao%2C+H+V">Hang Viet Dao</a>, <a href="/search/?searchtype=author&query=Van+Dao%2C+L">Long Van Dao</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+B+P">Binh Phuc Nguyen</a>, <a href="/search/?searchtype=author&query=Pham%2C+K+C">Khanh Cong Pham</a>, <a href="/search/?searchtype=author&query=Tran%2C+Q+T">Quang Trung Tran</a>, <a href="/search/?searchtype=author&query=Papachrysos%2C+N">Nikolaos Papachrysos</a>, <a href="/search/?searchtype=author&query=Rieders%2C+B">Brandon Rieders</a>, <a href="/search/?searchtype=author&query=Schmidt%2C+P+T">Peter Thelin Schmidt</a>, <a href="/search/?searchtype=author&query=Geissler%2C+E">Enrik Geissler</a>, <a href="/search/?searchtype=author&query=Berzin%2C+T">Tyler Berzin</a>, <a href="/search/?searchtype=author&query=Halvorsen%2C+P">P氓l Halvorsen</a>, <a href="/search/?searchtype=author&query=Riegler%2C+M+A">Michael A. Riegler</a>, <a href="/search/?searchtype=author&query=de+Lange%2C+T">Thomas de Lange</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00045v1-abstract-short" style="display: inline;"> Colonoscopy is the primary method for examination, detection, and removal of polyps. Regular screening helps detect and prevent colorectal cancer at an early curable stage. However, challenges such as variation among the endoscopists' skills, bowel quality preparation, and complex nature of the large intestine which cause large number of polyp miss-rate. These missed polyps can develop into cancer… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00045v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00045v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00045v1-abstract-full" style="display: none;"> Colonoscopy is the primary method for examination, detection, and removal of polyps. Regular screening helps detect and prevent colorectal cancer at an early curable stage. However, challenges such as variation among the endoscopists' skills, bowel quality preparation, and complex nature of the large intestine which cause large number of polyp miss-rate. These missed polyps can develop into cancer later on, which underscores the importance of improving the detection methods. A computer-aided diagnosis system can support physicians by assisting in detecting overlooked polyps. However, one of the important challenges for developing novel deep learning models for automatic polyp detection and segmentation is the lack of publicly available, multi-center large and diverse datasets. To address this gap, we introduce PolypDB, a large scale publicly available dataset that contains 3934 still polyp images and their corresponding ground truth from real colonoscopy videos to design efficient polyp detection and segmentation architectures. The dataset has been developed and verified by a team of 10 gastroenterologists. PolypDB comprises of images from five modalities: Blue Light Imaging (BLI), Flexible Imaging Color Enhancement (FICE), Linked Color Imaging (LCI), Narrow Band Imaging (NBI), and White Light Imaging (WLI) and three medical centers from Norway, Sweden and Vietnam. Thus, we split the dataset based on modality and medical center for modality-wise and center-wise analysis. We provide a benchmark on each modality using eight popular segmentation methods and six standard benchmark polyp detection methods. Furthermore, we also provide benchmark on center-wise under federated learning settings. Our dataset is public and can be downloaded at \url{https://osf.io/pr7ms/}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00045v1-abstract-full').style.display = 'none'; document.getElementById('2409.00045v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10733">arXiv:2408.10733</a> <span> [<a href="https://arxiv.org/pdf/2408.10733">pdf</a>, <a href="https://arxiv.org/format/2408.10733">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Classification of Endoscopy and Video Capsule Images using CNN-Transformer Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Subedi%2C+A">Aliza Subedi</a>, <a href="/search/?searchtype=author&query=Regmi%2C+S">Smriti Regmi</a>, <a href="/search/?searchtype=author&query=Regmi%2C+N">Nisha Regmi</a>, <a href="/search/?searchtype=author&query=Bhusal%2C+B">Bhumi Bhusal</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10733v1-abstract-short" style="display: inline;"> Gastrointestinal cancer is a leading cause of cancer-related incidence and death, making it crucial to develop novel computer-aided diagnosis systems for early detection and enhanced treatment. Traditional approaches rely on the expertise of gastroenterologists to identify diseases; however, this process is subjective, and interpretation can vary even among expert clinicians. Considering recent ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10733v1-abstract-full').style.display = 'inline'; document.getElementById('2408.10733v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10733v1-abstract-full" style="display: none;"> Gastrointestinal cancer is a leading cause of cancer-related incidence and death, making it crucial to develop novel computer-aided diagnosis systems for early detection and enhanced treatment. Traditional approaches rely on the expertise of gastroenterologists to identify diseases; however, this process is subjective, and interpretation can vary even among expert clinicians. Considering recent advancements in classifying gastrointestinal anomalies and landmarks in endoscopic and video capsule endoscopy images, this study proposes a hybrid model that combines the advantages of Transformers and Convolutional Neural Networks (CNNs) to enhance classification performance. Our model utilizes DenseNet201 as a CNN branch to extract local features and integrates a Swin Transformer branch for global feature understanding, combining both to perform the classification task. For the GastroVision dataset, our proposed model demonstrates excellent performance with Precision, Recall, F1 score, Accuracy, and Matthews Correlation Coefficient (MCC) of 0.8320, 0.8386, 0.8324, 0.8386, and 0.8191, respectively, showcasing its robustness against class imbalance and surpassing other CNNs as well as the Swin Transformer model. Similarly, for the Kvasir-Capsule, a large video capsule endoscopy dataset, our model outperforms all others, achieving overall Precision, Recall, F1 score, Accuracy, and MCC of 0.7007, 0.7239, 0.6900, 0.7239, and 0.3871. Moreover, we generated saliency maps to explain our model's focus areas, demonstrating its reliable decision-making process. The results underscore the potential of our hybrid CNN-Transformer model in aiding the early and accurate detection of gastrointestinal (GI) anomalies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10733v1-abstract-full').style.display = 'none'; document.getElementById('2408.10733v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05692">arXiv:2408.05692</a> <span> [<a href="https://arxiv.org/pdf/2408.05692">pdf</a>, <a href="https://arxiv.org/format/2408.05692">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A Novel Momentum-Based Deep Learning Techniques for Medical Image Classification and Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Pal%2C+R">Ridal Pal</a>, <a href="/search/?searchtype=author&query=Patel%2C+S">Shaswat Patel</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Karri%2C+M">Meghana Karri</a>, <a href="/search/?searchtype=author&query=Reza%2C+A">Amit Reza</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Antalek%2C+M">Matthew Antalek</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Ladner%2C+D">Daniela Ladner</a>, <a href="/search/?searchtype=author&query=Borhani%2C+A">Amir Borhani</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05692v1-abstract-short" style="display: inline;"> Accurately segmenting different organs from medical images is a critical prerequisite for computer-assisted diagnosis and intervention planning. This study proposes a deep learning-based approach for segmenting various organs from CT and MRI scans and classifying diseases. Our study introduces a novel technique integrating momentum within residual blocks for enhanced training dynamics in medical i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05692v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05692v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05692v1-abstract-full" style="display: none;"> Accurately segmenting different organs from medical images is a critical prerequisite for computer-assisted diagnosis and intervention planning. This study proposes a deep learning-based approach for segmenting various organs from CT and MRI scans and classifying diseases. Our study introduces a novel technique integrating momentum within residual blocks for enhanced training dynamics in medical image analysis. We applied our method in two distinct tasks: segmenting liver, lung, & colon data and classifying abdominal pelvic CT and MRI scans. The proposed approach has shown promising results, outperforming state-of-the-art methods on publicly available benchmarking datasets. For instance, in the lung segmentation dataset, our approach yielded significant enhancements over the TransNetR model, including a 5.72% increase in dice score, a 5.04% improvement in mean Intersection over Union (mIoU), an 8.02% improvement in recall, and a 4.42% improvement in precision. Hence, incorporating momentum led to state-of-the-art performance in both segmentation and classification tasks, representing a significant advancement in the field of medical imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05692v1-abstract-full').style.display = 'none'; document.getElementById('2408.05692v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04491">arXiv:2408.04491</a> <span> [<a href="https://arxiv.org/pdf/2408.04491">pdf</a>, <a href="https://arxiv.org/format/2408.04491">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Synergistic Deep Learning Models for Volumetric Cirrhotic Liver Segmentation in MRIs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gorade%2C+V">Vandan Gorade</a>, <a href="/search/?searchtype=author&query=Susladkar%2C+O">Onkar Susladkar</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Keles%2C+E">Elif Keles</a>, <a href="/search/?searchtype=author&query=Aktas%2C+E">Ertugrul Aktas</a>, <a href="/search/?searchtype=author&query=Cebeci%2C+T">Timurhan Cebeci</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Ladner%2C+D">Daniela Ladner</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04491v1-abstract-short" style="display: inline;"> Liver cirrhosis, a leading cause of global mortality, requires precise segmentation of ROIs for effective disease monitoring and treatment planning. Existing segmentation models often fail to capture complex feature interactions and generalize across diverse datasets. To address these limitations, we propose a novel synergistic theory that leverages complementary latent spaces for enhanced feature… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04491v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04491v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04491v1-abstract-full" style="display: none;"> Liver cirrhosis, a leading cause of global mortality, requires precise segmentation of ROIs for effective disease monitoring and treatment planning. Existing segmentation models often fail to capture complex feature interactions and generalize across diverse datasets. To address these limitations, we propose a novel synergistic theory that leverages complementary latent spaces for enhanced feature interaction modeling. Our proposed architecture, nnSynergyNet3D integrates continuous and discrete latent spaces for 3D volumes and features auto-configured training. This approach captures both fine-grained and coarse features, enabling effective modeling of intricate feature interactions. We empirically validated nnSynergyNet3D on a private dataset of 628 high-resolution T1 abdominal MRI scans from 339 patients. Our model outperformed the baseline nnUNet3D by approximately 2%. Additionally, zero-shot testing on healthy liver CT scans from the public LiTS dataset demonstrated superior cross-modal generalization capabilities. These results highlight the potential of synergistic latent space models to improve segmentation accuracy and robustness, thereby enhancing clinical workflows by ensuring consistency across CT and MRI modalities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04491v1-abstract-full').style.display = 'none'; document.getElementById('2408.04491v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19284">arXiv:2407.19284</a> <span> [<a href="https://arxiv.org/pdf/2407.19284">pdf</a>, <a href="https://arxiv.org/format/2407.19284">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Synthetic Data for Enhanced Pancreatic Tumor Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Peng%2C+L">Linkai Peng</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Miller%2C+F+H">Frank H. Miller</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Wallace%2C+M+B">Michael B. Wallace</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19284v2-abstract-short" style="display: inline;"> Pancreatic cancer remains one of the leading causes of cancer-related mortality worldwide. Precise segmentation of pancreatic tumors from medical images is a bottleneck for effective clinical decision-making. However, achieving a high accuracy is often limited by the small size and availability of real patient data for training deep learning models. Recent approaches have employed synthetic data g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19284v2-abstract-full').style.display = 'inline'; document.getElementById('2407.19284v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19284v2-abstract-full" style="display: none;"> Pancreatic cancer remains one of the leading causes of cancer-related mortality worldwide. Precise segmentation of pancreatic tumors from medical images is a bottleneck for effective clinical decision-making. However, achieving a high accuracy is often limited by the small size and availability of real patient data for training deep learning models. Recent approaches have employed synthetic data generation to augment training datasets. While promising, these methods may not yet meet the performance benchmarks required for real-world clinical use. This study critically evaluates the limitations of existing generative-AI based frameworks for pancreatic tumor segmentation. We conduct a series of experiments to investigate the impact of synthetic \textit{tumor size} and \textit{boundary definition} precision on model performance. Our findings demonstrate that: (1) strategically selecting a combination of synthetic tumor sizes is crucial for optimal segmentation outcomes, and (2) generating synthetic tumors with precise boundaries significantly improves model accuracy. These insights highlight the importance of utilizing refined synthetic data augmentation for enhancing the clinical utility of segmentation models in pancreatic cancer decision making including diagnosis, prognosis, and treatment plans. Our code will be available at https://github.com/lkpengcs/SynTumorAnalyzer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19284v2-abstract-full').style.display = 'none'; document.getElementById('2407.19284v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MICCAI Workshop AIPAD 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14819">arXiv:2406.14819</a> <span> [<a href="https://arxiv.org/pdf/2406.14819">pdf</a>, <a href="https://arxiv.org/format/2406.14819">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SAM-EG: Segment Anything Model with Egde Guidance framework for efficient Polyp Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Trinh%2C+Q">Quoc-Huy Trinh</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+H">Hai-Dang Nguyen</a>, <a href="/search/?searchtype=author&query=Ngoc%2C+B+N">Bao-Tram Nguyen Ngoc</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Tran%2C+M">Minh-Triet Tran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14819v1-abstract-short" style="display: inline;"> Polyp segmentation, a critical concern in medical imaging, has prompted numerous proposed methods aimed at enhancing the quality of segmented masks. While current state-of-the-art techniques produce impressive results, the size and computational cost of these models pose challenges for practical industry applications. Recently, the Segment Anything Model (SAM) has been proposed as a robust foundat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14819v1-abstract-full').style.display = 'inline'; document.getElementById('2406.14819v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14819v1-abstract-full" style="display: none;"> Polyp segmentation, a critical concern in medical imaging, has prompted numerous proposed methods aimed at enhancing the quality of segmented masks. While current state-of-the-art techniques produce impressive results, the size and computational cost of these models pose challenges for practical industry applications. Recently, the Segment Anything Model (SAM) has been proposed as a robust foundation model, showing promise for adaptation to medical image segmentation. Inspired by this concept, we propose SAM-EG, a framework that guides small segmentation models for polyp segmentation to address the computation cost challenge. Additionally, in this study, we introduce the Edge Guiding module, which integrates edge information into image features to assist the segmentation model in addressing boundary issues from current segmentation model in this task. Through extensive experiments, our small models showcase their efficacy by achieving competitive results with state-of-the-art methods, offering a promising approach to developing compact models with high accuracy for polyp segmentation and in the broader field of medical imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14819v1-abstract-full').style.display = 'none'; document.getElementById('2406.14819v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11868">arXiv:2406.11868</a> <span> [<a href="https://arxiv.org/pdf/2406.11868">pdf</a>, <a href="https://arxiv.org/format/2406.11868">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Ethical Framework for Responsible Foundational Models in Medical Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Das%2C+A">Abhijit Das</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Sanjotra%2C+J">Jasmer Sanjotra</a>, <a href="/search/?searchtype=author&query=Susladkar%2C+O">Onkar Susladkar</a>, <a href="/search/?searchtype=author&query=Sarkar%2C+S">Suramyaa Sarkar</a>, <a href="/search/?searchtype=author&query=Rauniyar%2C+A">Ashish Rauniyar</a>, <a href="/search/?searchtype=author&query=Tomar%2C+N">Nikhil Tomar</a>, <a href="/search/?searchtype=author&query=Sharma%2C+V">Vanshali Sharma</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11868v1-abstract-short" style="display: inline;"> Foundational models (FMs) have tremendous potential to revolutionize medical imaging. However, their deployment in real-world clinical settings demands extensive ethical considerations. This paper aims to highlight the ethical concerns related to FMs and propose a framework to guide their responsible development and implementation within medicine. We meticulously examine ethical issues such as pri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11868v1-abstract-full').style.display = 'inline'; document.getElementById('2406.11868v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11868v1-abstract-full" style="display: none;"> Foundational models (FMs) have tremendous potential to revolutionize medical imaging. However, their deployment in real-world clinical settings demands extensive ethical considerations. This paper aims to highlight the ethical concerns related to FMs and propose a framework to guide their responsible development and implementation within medicine. We meticulously examine ethical issues such as privacy of patient data, bias mitigation, algorithmic transparency, explainability and accountability. The proposed framework is designed to prioritize patient welfare, mitigate potential risks, and foster trust in AI-assisted healthcare. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11868v1-abstract-full').style.display = 'none'; document.getElementById('2406.11868v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.03430">arXiv:2406.03430</a> <span> [<a href="https://arxiv.org/pdf/2406.03430">pdf</a>, <a href="https://arxiv.org/format/2406.03430">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Computation-Efficient Era: A Comprehensive Survey of State Space Models in Medical Image Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Heidari%2C+M">Moein Heidari</a>, <a href="/search/?searchtype=author&query=Kolahi%2C+S+G">Sina Ghorbani Kolahi</a>, <a href="/search/?searchtype=author&query=Karimijafarbigloo%2C+S">Sanaz Karimijafarbigloo</a>, <a href="/search/?searchtype=author&query=Azad%2C+B">Bobby Azad</a>, <a href="/search/?searchtype=author&query=Bozorgpour%2C+A">Afshin Bozorgpour</a>, <a href="/search/?searchtype=author&query=Hatami%2C+S">Soheila Hatami</a>, <a href="/search/?searchtype=author&query=Azad%2C+R">Reza Azad</a>, <a href="/search/?searchtype=author&query=Diba%2C+A">Ali Diba</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Merhof%2C+D">Dorit Merhof</a>, <a href="/search/?searchtype=author&query=Hacihaliloglu%2C+I">Ilker Hacihaliloglu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.03430v1-abstract-short" style="display: inline;"> Sequence modeling plays a vital role across various domains, with recurrent neural networks being historically the predominant method of performing these tasks. However, the emergence of transformers has altered this paradigm due to their superior performance. Built upon these advances, transformers have conjoined CNNs as two leading foundational models for learning visual representations. However… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03430v1-abstract-full').style.display = 'inline'; document.getElementById('2406.03430v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.03430v1-abstract-full" style="display: none;"> Sequence modeling plays a vital role across various domains, with recurrent neural networks being historically the predominant method of performing these tasks. However, the emergence of transformers has altered this paradigm due to their superior performance. Built upon these advances, transformers have conjoined CNNs as two leading foundational models for learning visual representations. However, transformers are hindered by the $\mathcal{O}(N^2)$ complexity of their attention mechanisms, while CNNs lack global receptive fields and dynamic weight allocation. State Space Models (SSMs), specifically the \textit{\textbf{Mamba}} model with selection mechanisms and hardware-aware architecture, have garnered immense interest lately in sequential modeling and visual representation learning, challenging the dominance of transformers by providing infinite context lengths and offering substantial efficiency maintaining linear complexity in the input sequence. Capitalizing on the advances in computer vision, medical imaging has heralded a new epoch with Mamba models. Intending to help researchers navigate the surge, this survey seeks to offer an encyclopedic review of Mamba models in medical imaging. Specifically, we start with a comprehensive theoretical review forming the basis of SSMs, including Mamba architecture and its alternatives for sequence modeling paradigms in this context. Next, we offer a structured classification of Mamba models in the medical field and introduce a diverse categorization scheme based on their application, imaging modalities, and targeted organs. Finally, we summarize key challenges, discuss different future research directions of the SSMs in the medical domain, and propose several directions to fulfill the demands of this field. In addition, we have compiled the studies discussed in this paper along with their open-source implementations on our GitHub repository. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03430v1-abstract-full').style.display = 'none'; document.getElementById('2406.03430v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is the first version of our survey, and the paper is currently under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18383">arXiv:2405.18383</a> <span> [<a href="https://arxiv.org/pdf/2405.18383">pdf</a>, <a href="https://arxiv.org/format/2405.18383">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Brain Tumor Segmentation (BraTS) Challenge 2024: Meningioma Radiotherapy Planning Automated Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=LaBella%2C+D">Dominic LaBella</a>, <a href="/search/?searchtype=author&query=Schumacher%2C+K">Katherine Schumacher</a>, <a href="/search/?searchtype=author&query=Mix%2C+M">Michael Mix</a>, <a href="/search/?searchtype=author&query=Leu%2C+K">Kevin Leu</a>, <a href="/search/?searchtype=author&query=McBurney-Lin%2C+S">Shan McBurney-Lin</a>, <a href="/search/?searchtype=author&query=Nedelec%2C+P">Pierre Nedelec</a>, <a href="/search/?searchtype=author&query=Villanueva-Meyer%2C+J">Javier Villanueva-Meyer</a>, <a href="/search/?searchtype=author&query=Shapey%2C+J">Jonathan Shapey</a>, <a href="/search/?searchtype=author&query=Vercauteren%2C+T">Tom Vercauteren</a>, <a href="/search/?searchtype=author&query=Chia%2C+K">Kazumi Chia</a>, <a href="/search/?searchtype=author&query=Al-Salihi%2C+O">Omar Al-Salihi</a>, <a href="/search/?searchtype=author&query=Leu%2C+J">Justin Leu</a>, <a href="/search/?searchtype=author&query=Halasz%2C+L">Lia Halasz</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Wang%2C+C">Chunhao Wang</a>, <a href="/search/?searchtype=author&query=Kirkpatrick%2C+J">John Kirkpatrick</a>, <a href="/search/?searchtype=author&query=Floyd%2C+S">Scott Floyd</a>, <a href="/search/?searchtype=author&query=Reitman%2C+Z+J">Zachary J. Reitman</a>, <a href="/search/?searchtype=author&query=Mullikin%2C+T">Trey Mullikin</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Sachdev%2C+S">Sean Sachdev</a>, <a href="/search/?searchtype=author&query=Hattangadi-Gluth%2C+J+A">Jona A. Hattangadi-Gluth</a>, <a href="/search/?searchtype=author&query=Seibert%2C+T">Tyler Seibert</a>, <a href="/search/?searchtype=author&query=Farid%2C+N">Nikdokht Farid</a>, <a href="/search/?searchtype=author&query=Puett%2C+C">Connor Puett</a> , et al. (45 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18383v2-abstract-short" style="display: inline;"> The 2024 Brain Tumor Segmentation Meningioma Radiotherapy (BraTS-MEN-RT) challenge aims to advance automated segmentation algorithms using the largest known multi-institutional dataset of radiotherapy planning brain MRIs with expert-annotated target labels for patients with intact or postoperative meningioma that underwent either conventional external beam radiotherapy or stereotactic radiosurgery… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18383v2-abstract-full').style.display = 'inline'; document.getElementById('2405.18383v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18383v2-abstract-full" style="display: none;"> The 2024 Brain Tumor Segmentation Meningioma Radiotherapy (BraTS-MEN-RT) challenge aims to advance automated segmentation algorithms using the largest known multi-institutional dataset of radiotherapy planning brain MRIs with expert-annotated target labels for patients with intact or postoperative meningioma that underwent either conventional external beam radiotherapy or stereotactic radiosurgery. Each case includes a defaced 3D post-contrast T1-weighted radiotherapy planning MRI in its native acquisition space, accompanied by a single-label "target volume" representing the gross tumor volume (GTV) and any at-risk postoperative site. Target volume annotations adhere to established radiotherapy planning protocols, ensuring consistency across cases and institutions. For preoperative meningiomas, the target volume encompasses the entire GTV and associated nodular dural tail, while for postoperative cases, it includes at-risk resection cavity margins as determined by the treating institution. Case annotations were reviewed and approved by expert neuroradiologists and radiation oncologists. Participating teams will develop, containerize, and evaluate automated segmentation models using this comprehensive dataset. Model performance will be assessed using an adapted lesion-wise Dice Similarity Coefficient and the 95% Hausdorff distance. The top-performing teams will be recognized at the Medical Image Computing and Computer Assisted Intervention Conference in October 2024. BraTS-MEN-RT is expected to significantly advance automated radiotherapy planning by enabling precise tumor segmentation and facilitating tailored treatment, ultimately improving patient outcomes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18383v2-abstract-full').style.display = 'none'; document.getElementById('2405.18383v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 9 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16740">arXiv:2405.16740</a> <span> [<a href="https://arxiv.org/pdf/2405.16740">pdf</a>, <a href="https://arxiv.org/format/2405.16740">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PP-SAM: Perturbed Prompts for Robust Adaptation of Segment Anything Model for Polyp Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Rahman%2C+M+M">Md Mostafijur Rahman</a>, <a href="/search/?searchtype=author&query=Munir%2C+M">Mustafa Munir</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Marculescu%2C+R">Radu Marculescu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16740v1-abstract-short" style="display: inline;"> The Segment Anything Model (SAM), originally designed for general-purpose segmentation tasks, has been used recently for polyp segmentation. Nonetheless, fine-tuning SAM with data from new imaging centers or clinics poses significant challenges. This is because this necessitates the creation of an expensive and time-intensive annotated dataset, along with the potential for variability in user prom… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16740v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16740v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16740v1-abstract-full" style="display: none;"> The Segment Anything Model (SAM), originally designed for general-purpose segmentation tasks, has been used recently for polyp segmentation. Nonetheless, fine-tuning SAM with data from new imaging centers or clinics poses significant challenges. This is because this necessitates the creation of an expensive and time-intensive annotated dataset, along with the potential for variability in user prompts during inference. To address these issues, we propose a robust fine-tuning technique, PP-SAM, that allows SAM to adapt to the polyp segmentation task with limited images. To this end, we utilize variable perturbed bounding box prompts (BBP) to enrich the learning context and enhance the model's robustness to BBP perturbations during inference. Rigorous experiments on polyp segmentation benchmarks reveal that our variable BBP perturbation significantly improves model resilience. Notably, on Kvasir, 1-shot fine-tuning boosts the DICE score by 20% and 37% with 50 and 100-pixel BBP perturbations during inference, respectively. Moreover, our experiments show that 1-shot, 5-shot, and 10-shot PP-SAM with 50-pixel perturbations during inference outperform a recent state-of-the-art (SOTA) polyp segmentation method by 26%, 7%, and 5% DICE scores, respectively. Our results motivate the broader applicability of our PP-SAM for other medical imaging tasks with limited samples. Our implementation is available at https://github.com/SLDGroup/PP-SAM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16740v1-abstract-full').style.display = 'none'; document.getElementById('2405.16740v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 9 figures, Proceedings of the 2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13901">arXiv:2405.13901</a> <span> [<a href="https://arxiv.org/pdf/2405.13901">pdf</a>, <a href="https://arxiv.org/format/2405.13901">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> DCT-Based Decorrelated Attention for Vision Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Pan%2C+H">Hongyi Pan</a>, <a href="/search/?searchtype=author&query=Hamdan%2C+E">Emadeldeen Hamdan</a>, <a href="/search/?searchtype=author&query=Zhu%2C+X">Xin Zhu</a>, <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Cetin%2C+A+E">Ahmet Enis Cetin</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13901v2-abstract-short" style="display: inline;"> Central to the Transformer architectures' effectiveness is the self-attention mechanism, a function that maps queries, keys, and values into a high-dimensional vector space. However, training the attention weights of queries, keys, and values is non-trivial from a state of random initialization. In this paper, we propose two methods. (i) We first address the initialization problem of Vision Transf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13901v2-abstract-full').style.display = 'inline'; document.getElementById('2405.13901v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13901v2-abstract-full" style="display: none;"> Central to the Transformer architectures' effectiveness is the self-attention mechanism, a function that maps queries, keys, and values into a high-dimensional vector space. However, training the attention weights of queries, keys, and values is non-trivial from a state of random initialization. In this paper, we propose two methods. (i) We first address the initialization problem of Vision Transformers by introducing a simple, yet highly innovative, initialization approach utilizing Discrete Cosine Transform (DCT) coefficients. Our proposed DCT-based attention initialization marks a significant gain compared to traditional initialization strategies; offering a robust foundation for the attention mechanism. Our experiments reveal that the DCT-based initialization enhances the accuracy of Vision Transformers in classification tasks. (ii) We also recognize that since DCT effectively decorrelates image information in the frequency domain, this decorrelation is useful for compression because it allows the quantization step to discard many of the higher-frequency components. Based on this observation, we propose a novel DCT-based compression technique for the attention function of Vision Transformers. Since high-frequency DCT coefficients usually correspond to noise, we truncate the high-frequency DCT components of the input patches. Our DCT-based compression reduces the size of weight matrices for queries, keys, and values. While maintaining the same level of accuracy, our DCT compressed Swin Transformers obtain a considerable decrease in the computational overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13901v2-abstract-full').style.display = 'none'; document.getElementById('2405.13901v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.12367">arXiv:2405.12367</a> <span> [<a href="https://arxiv.org/pdf/2405.12367">pdf</a>, <a href="https://arxiv.org/format/2405.12367">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Large-Scale Multi-Center CT and MRI Segmentation of Pancreas with Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Keles%2C+E">Elif Keles</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Taktak%2C+Y">Yavuz Taktak</a>, <a href="/search/?searchtype=author&query=Susladkar%2C+O">Onkar Susladkar</a>, <a href="/search/?searchtype=author&query=Gorade%2C+V">Vandan Gorade</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Ormeci%2C+A+C">Asli C. Ormeci</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Yao%2C+L">Lanhong Yao</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Bin Wang</a>, <a href="/search/?searchtype=author&query=Isler%2C+I+S">Ilkin Sevgi Isler</a>, <a href="/search/?searchtype=author&query=Peng%2C+L">Linkai Peng</a>, <a href="/search/?searchtype=author&query=Pan%2C+H">Hongyi Pan</a>, <a href="/search/?searchtype=author&query=Vendrami%2C+C+L">Camila Lopes Vendrami</a>, <a href="/search/?searchtype=author&query=Bourhani%2C+A">Amir Bourhani</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Gong%2C+B">Boqing Gong</a>, <a href="/search/?searchtype=author&query=Spampinato%2C+C">Concetto Spampinato</a>, <a href="/search/?searchtype=author&query=Pyrros%2C+A">Ayis Pyrros</a>, <a href="/search/?searchtype=author&query=Tiwari%2C+P">Pallavi Tiwari</a>, <a href="/search/?searchtype=author&query=Klatte%2C+D+C+F">Derk C. F. Klatte</a>, <a href="/search/?searchtype=author&query=Engels%2C+M">Megan Engels</a>, <a href="/search/?searchtype=author&query=Hoogenboom%2C+S">Sanne Hoogenboom</a>, <a href="/search/?searchtype=author&query=Bolan%2C+C+W">Candice W. Bolan</a> , et al. (13 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.12367v3-abstract-short" style="display: inline;"> Automated volumetric segmentation of the pancreas on cross-sectional imaging is needed for diagnosis and follow-up of pancreatic diseases. While CT-based pancreatic segmentation is more established, MRI-based segmentation methods are understudied, largely due to a lack of publicly available datasets, benchmarking research efforts, and domain-specific deep learning methods. In this retrospective st… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12367v3-abstract-full').style.display = 'inline'; document.getElementById('2405.12367v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.12367v3-abstract-full" style="display: none;"> Automated volumetric segmentation of the pancreas on cross-sectional imaging is needed for diagnosis and follow-up of pancreatic diseases. While CT-based pancreatic segmentation is more established, MRI-based segmentation methods are understudied, largely due to a lack of publicly available datasets, benchmarking research efforts, and domain-specific deep learning methods. In this retrospective study, we collected a large dataset (767 scans from 499 participants) of T1-weighted (T1W) and T2-weighted (T2W) abdominal MRI series from five centers between March 2004 and November 2022. We also collected CT scans of 1,350 patients from publicly available sources for benchmarking purposes. We developed a new pancreas segmentation method, called PanSegNet, combining the strengths of nnUNet and a Transformer network with a new linear attention module enabling volumetric computation. We tested PanSegNet's accuracy in cross-modality (a total of 2,117 scans) and cross-center settings with Dice and Hausdorff distance (HD95) evaluation metrics. We used Cohen's kappa statistics for intra and inter-rater agreement evaluation and paired t-tests for volume and Dice comparisons, respectively. For segmentation accuracy, we achieved Dice coefficients of 88.3% (std: 7.2%, at case level) with CT, 85.0% (std: 7.9%) with T1W MRI, and 86.3% (std: 6.4%) with T2W MRI. There was a high correlation for pancreas volume prediction with R^2 of 0.91, 0.84, and 0.85 for CT, T1W, and T2W, respectively. We found moderate inter-observer (0.624 and 0.638 for T1W and T2W MRI, respectively) and high intra-observer agreement scores. All MRI data is made available at https://osf.io/kysnj/. Our source code is available at https://github.com/NUBagciLab/PaNSegNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12367v3-abstract-full').style.display = 'none'; document.getElementById('2405.12367v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Peer-reviewer version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.06166">arXiv:2405.06166</a> <span> [<a href="https://arxiv.org/pdf/2405.06166">pdf</a>, <a href="https://arxiv.org/format/2405.06166">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MDNet: Multi-Decoder Network for Abdominal CT Organs Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Tomar%2C+N+K">Nikhil Kumar Tomar</a>, <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Antalek%2C+M">Matthew Antalek</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Bin Wang</a>, <a href="/search/?searchtype=author&query=Rahman%2C+M+M">Md Mostafijur Rahman</a>, <a href="/search/?searchtype=author&query=Pan%2C+H">Hongyi Pan</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Ladner%2C+D">Daniela Ladner</a>, <a href="/search/?searchtype=author&query=Borhani%2C+A">Amir Borhani</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.06166v1-abstract-short" style="display: inline;"> Accurate segmentation of organs from abdominal CT scans is essential for clinical applications such as diagnosis, treatment planning, and patient monitoring. To handle challenges of heterogeneity in organ shapes, sizes, and complex anatomical relationships, we propose a \textbf{\textit{\ac{MDNet}}}, an encoder-decoder network that uses the pre-trained \textit{MiT-B2} as the encoder and multiple di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06166v1-abstract-full').style.display = 'inline'; document.getElementById('2405.06166v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.06166v1-abstract-full" style="display: none;"> Accurate segmentation of organs from abdominal CT scans is essential for clinical applications such as diagnosis, treatment planning, and patient monitoring. To handle challenges of heterogeneity in organ shapes, sizes, and complex anatomical relationships, we propose a \textbf{\textit{\ac{MDNet}}}, an encoder-decoder network that uses the pre-trained \textit{MiT-B2} as the encoder and multiple different decoder networks. Each decoder network is connected to a different part of the encoder via a multi-scale feature enhancement dilated block. With each decoder, we increase the depth of the network iteratively and refine segmentation masks, enriching feature maps by integrating previous decoders' feature maps. To refine the feature map further, we also utilize the predicted masks from the previous decoder to the current decoder to provide spatial attention across foreground and background regions. MDNet effectively refines the segmentation mask with a high dice similarity coefficient (DSC) of 0.9013 and 0.9169 on the Liver Tumor segmentation (LiTS) and MSD Spleen datasets. Additionally, it reduces Hausdorff distance (HD) to 3.79 for the LiTS dataset and 2.26 for the spleen segmentation dataset, underscoring the precision of MDNet in capturing the complex contours. Moreover, \textit{\ac{MDNet}} is more interpretable and robust compared to the other baseline models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06166v1-abstract-full').style.display = 'none'; document.getElementById('2405.06166v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01503">arXiv:2405.01503</a> <span> [<a href="https://arxiv.org/pdf/2405.01503">pdf</a>, <a href="https://arxiv.org/format/2405.01503">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PAM-UNet: Shifting Attention on Region of Interest in Medical Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Das%2C+A">Abhijit Das</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Gorade%2C+V">Vandan Gorade</a>, <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Pan%2C+H">Hongyi Pan</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Ladner%2C+D+P">Daniela P. Ladner</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Borhani%2C+A">Amir Borhani</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01503v1-abstract-short" style="display: inline;"> Computer-aided segmentation methods can assist medical personnel in improving diagnostic outcomes. While recent advancements like UNet and its variants have shown promise, they face a critical challenge: balancing accuracy with computational efficiency. Shallow encoder architectures in UNets often struggle to capture crucial spatial features, leading in inaccurate and sparse segmentation. To addre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01503v1-abstract-full').style.display = 'inline'; document.getElementById('2405.01503v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01503v1-abstract-full" style="display: none;"> Computer-aided segmentation methods can assist medical personnel in improving diagnostic outcomes. While recent advancements like UNet and its variants have shown promise, they face a critical challenge: balancing accuracy with computational efficiency. Shallow encoder architectures in UNets often struggle to capture crucial spatial features, leading in inaccurate and sparse segmentation. To address this limitation, we propose a novel \underline{P}rogressive \underline{A}ttention based \underline{M}obile \underline{UNet} (\underline{PAM-UNet}) architecture. The inverted residual (IR) blocks in PAM-UNet help maintain a lightweight framework, while layerwise \textit{Progressive Luong Attention} ($\mathcal{PLA}$) promotes precise segmentation by directing attention toward regions of interest during synthesis. Our approach prioritizes both accuracy and speed, achieving a commendable balance with a mean IoU of 74.65 and a dice score of 82.87, while requiring only 1.32 floating-point operations per second (FLOPS) on the Liver Tumor Segmentation Benchmark (LiTS) 2017 dataset. These results highlight the importance of developing efficient segmentation models to accelerate the adoption of AI in clinical practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01503v1-abstract-full').style.display = 'none'; document.getElementById('2405.01503v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at 2024 IEEE EMBC</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17742">arXiv:2404.17742</a> <span>  </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Segmentation Quality and Volumetric Accuracy in Medical Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17742v2-abstract-short" style="display: inline;"> Current medical image segmentation relies on the region-based (Dice, F1-score) and boundary-based (Hausdorff distance, surface distance) metrics as the de-facto standard. While these metrics are widely used, they lack a unified interpretation, particularly regarding volume agreement. Clinicians often lack clear benchmarks to gauge the "goodness" of segmentation results based on these metrics. Reco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17742v2-abstract-full').style.display = 'inline'; document.getElementById('2404.17742v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17742v2-abstract-full" style="display: none;"> Current medical image segmentation relies on the region-based (Dice, F1-score) and boundary-based (Hausdorff distance, surface distance) metrics as the de-facto standard. While these metrics are widely used, they lack a unified interpretation, particularly regarding volume agreement. Clinicians often lack clear benchmarks to gauge the "goodness" of segmentation results based on these metrics. Recognizing the clinical relevance of volumetry, we utilize relative volume prediction error (vpe) to directly assess the accuracy of volume predictions derived from segmentation tasks. Our work integrates theoretical analysis and empirical validation across diverse datasets. We delve into the often-ambiguous relationship between segmentation quality (measured by Dice) and volumetric accuracy in clinical practice. Our findings highlight the critical role of incorporating volumetric prediction accuracy into segmentation evaluation. This approach empowers clinicians with a more nuanced understanding of segmentation performance, ultimately improving the interpretation and utility of these metrics in real-world healthcare settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17742v2-abstract-full').style.display = 'none'; document.getElementById('2404.17742v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Data used in the paper contains some privacy issue in medical image. Some proper citations are also missing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17064">arXiv:2404.17064</a> <span> [<a href="https://arxiv.org/pdf/2404.17064">pdf</a>, <a href="https://arxiv.org/format/2404.17064">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Detection of Peri-Pancreatic Edema using Deep Learning and Radiomics Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hong%2C+Z">Ziliang Hong</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Yazici%2C+C">Cemal Yazici</a>, <a href="/search/?searchtype=author&query=Tirkes%2C+T">Temel Tirkes</a>, <a href="/search/?searchtype=author&query=Borhani%2C+A">Amir Borhani</a>, <a href="/search/?searchtype=author&query=Turkbey%2C+B">Baris Turkbey</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17064v1-abstract-short" style="display: inline;"> Identifying peri-pancreatic edema is a pivotal indicator for identifying disease progression and prognosis, emphasizing the critical need for accurate detection and assessment in pancreatitis diagnosis and management. This study \textit{introduces a novel CT dataset sourced from 255 patients with pancreatic diseases, featuring annotated pancreas segmentation masks and corresponding diagnostic labe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17064v1-abstract-full').style.display = 'inline'; document.getElementById('2404.17064v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17064v1-abstract-full" style="display: none;"> Identifying peri-pancreatic edema is a pivotal indicator for identifying disease progression and prognosis, emphasizing the critical need for accurate detection and assessment in pancreatitis diagnosis and management. This study \textit{introduces a novel CT dataset sourced from 255 patients with pancreatic diseases, featuring annotated pancreas segmentation masks and corresponding diagnostic labels for peri-pancreatic edema condition}. With the novel dataset, we first evaluate the efficacy of the \textit{LinTransUNet} model, a linear Transformer based segmentation algorithm, to segment the pancreas accurately from CT imaging data. Then, we use segmented pancreas regions with two distinctive machine learning classifiers to identify existence of peri-pancreatic edema: deep learning-based models and a radiomics-based eXtreme Gradient Boosting (XGBoost). The LinTransUNet achieved promising results, with a dice coefficient of 80.85\%, and mIoU of 68.73\%. Among the nine benchmarked classification models for peri-pancreatic edema detection, \textit{Swin-Tiny} transformer model demonstrated the highest recall of $98.85 \pm 0.42$ and precision of $98.38\pm 0.17$. Comparatively, the radiomics-based XGBoost model achieved an accuracy of $79.61\pm4.04$ and recall of $91.05\pm3.28$, showcasing its potential as a supplementary diagnostic tool given its rapid processing speed and reduced training time. Our code is available \url{https://github.com/NUBagciLab/Peri-Pancreatic-Edema-Detection}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17064v1-abstract-full').style.display = 'none'; document.getElementById('2404.17064v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.06961">arXiv:2403.06961</a> <span> [<a href="https://arxiv.org/pdf/2403.06961">pdf</a>, <a href="https://arxiv.org/format/2403.06961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Explainable Transformer Prototypes for Medical Diagnoses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Demir%2C+U">Ugur Demir</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Keles%2C+E">Elif Keles</a>, <a href="/search/?searchtype=author&query=Allen%2C+B">Bradley Allen</a>, <a href="/search/?searchtype=author&query=Katsaggelos%2C+A+K">Aggelos K. Katsaggelos</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.06961v1-abstract-short" style="display: inline;"> Deployments of artificial intelligence in medical diagnostics mandate not just accuracy and efficacy but also trust, emphasizing the need for explainability in machine decisions. The recent trend in automated medical image diagnostics leans towards the deployment of Transformer-based architectures, credited to their impressive capabilities. Since the self-attention feature of transformers contribu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06961v1-abstract-full').style.display = 'inline'; document.getElementById('2403.06961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.06961v1-abstract-full" style="display: none;"> Deployments of artificial intelligence in medical diagnostics mandate not just accuracy and efficacy but also trust, emphasizing the need for explainability in machine decisions. The recent trend in automated medical image diagnostics leans towards the deployment of Transformer-based architectures, credited to their impressive capabilities. Since the self-attention feature of transformers contributes towards identifying crucial regions during the classification process, they enhance the trustability of the methods. However, the complex intricacies of these attention mechanisms may fall short of effectively pinpointing the regions of interest directly influencing AI decisions. Our research endeavors to innovate a unique attention block that underscores the correlation between 'regions' rather than 'pixels'. To address this challenge, we introduce an innovative system grounded in prototype learning, featuring an advanced self-attention mechanism that goes beyond conventional ad-hoc visual explanation techniques by offering comprehensible visual insights. A combined quantitative and qualitative methodological approach was used to demonstrate the effectiveness of the proposed method on the large-scale NIH chest X-ray dataset. Experimental results showed that our proposed method offers a promising direction for explainability, which can lead to the development of more trustable systems, which can facilitate easier and rapid adoption of such technology into routine clinics. The code is available at www.github.com/NUBagcilab/r2r_proto. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06961v1-abstract-full').style.display = 'none'; document.getElementById('2403.06961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05024">arXiv:2403.05024</a> <span> [<a href="https://arxiv.org/pdf/2403.05024">pdf</a>, <a href="https://arxiv.org/format/2403.05024">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Probabilistic Hadamard U-Net for MRI Bias Field Correction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhu%2C+X">Xin Zhu</a>, <a href="/search/?searchtype=author&query=Pan%2C+H">Hongyi Pan</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Murphy%2C+A+B">Adam B. Murphy</a>, <a href="/search/?searchtype=author&query=Ross%2C+A">Ashley Ross</a>, <a href="/search/?searchtype=author&query=Turkbey%2C+B">Baris Turkbey</a>, <a href="/search/?searchtype=author&query=Cetin%2C+A+E">Ahmet Enis Cetin</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05024v2-abstract-short" style="display: inline;"> Magnetic field inhomogeneity correction remains a challenging task in MRI analysis. Most established techniques are designed for brain MRI by supposing that image intensities in the identical tissue follow a uniform distribution. Such an assumption cannot be easily applied to other organs, especially those that are small in size and heterogeneous in texture (large variations in intensity), such as… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05024v2-abstract-full').style.display = 'inline'; document.getElementById('2403.05024v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05024v2-abstract-full" style="display: none;"> Magnetic field inhomogeneity correction remains a challenging task in MRI analysis. Most established techniques are designed for brain MRI by supposing that image intensities in the identical tissue follow a uniform distribution. Such an assumption cannot be easily applied to other organs, especially those that are small in size and heterogeneous in texture (large variations in intensity), such as the prostate. To address this problem, this paper proposes a probabilistic Hadamard U-Net (PHU-Net) for prostate MRI bias field correction. First, a novel Hadamard U-Net (HU-Net) is introduced to extract the low-frequency scalar field, multiplied by the original input to obtain the prototypical corrected image. HU-Net converts the input image from the time domain into the frequency domain via Hadamard transform. In the frequency domain, high-frequency components are eliminated using the trainable filter (scaling layer), hard-thresholding layer, and sparsity penalty. Next, a conditional variational autoencoder is used to encode possible bias field-corrected variants into a low-dimensional latent space. Random samples drawn from latent space are then incorporated with a prototypical corrected image to generate multiple plausible images. Experimental results demonstrate the effectiveness of PHU-Net in correcting bias-field in prostate MRI with a fast inference speed. It has also been shown that prostate MRI segmentation accuracy improves with the high-quality corrected images from PHU-Net. The code will be available in the final version of this manuscript. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05024v2-abstract-full').style.display = 'none'; document.getElementById('2403.05024v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.10373">arXiv:2401.10373</a> <span> [<a href="https://arxiv.org/pdf/2401.10373">pdf</a>, <a href="https://arxiv.org/format/2401.10373">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Harmonized Spatial and Spectral Learning for Robust and Generalized Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gorade%2C+V">Vandan Gorade</a>, <a href="/search/?searchtype=author&query=Mittal%2C+S">Sparsh Mittal</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Singhal%2C+R">Rekha Singhal</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.10373v2-abstract-short" style="display: inline;"> Deep learning has demonstrated remarkable achievements in medical image segmentation. However, prevailing deep learning models struggle with poor generalization due to (i) intra-class variations, where the same class appears differently in different samples, and (ii) inter-class independence, resulting in difficulties capturing intricate relationships between distinct objects, leading to higher fa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.10373v2-abstract-full').style.display = 'inline'; document.getElementById('2401.10373v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.10373v2-abstract-full" style="display: none;"> Deep learning has demonstrated remarkable achievements in medical image segmentation. However, prevailing deep learning models struggle with poor generalization due to (i) intra-class variations, where the same class appears differently in different samples, and (ii) inter-class independence, resulting in difficulties capturing intricate relationships between distinct objects, leading to higher false negative cases. This paper presents a novel approach that synergies spatial and spectral representations to enhance domain-generalized medical image segmentation. We introduce the innovative Spectral Correlation Coefficient objective to improve the model's capacity to capture middle-order features and contextual long-range dependencies. This objective complements traditional spatial objectives by incorporating valuable spectral information. Extensive experiments reveal that optimizing this objective with existing architectures like UNet and TransUNet significantly enhances generalization, interpretability, and noise robustness, producing more confident predictions. For instance, in cardiac segmentation, we observe a 0.81 pp and 1.63 pp (pp = percentage point) improvement in DSC over UNet and TransUNet, respectively. Our interpretability study demonstrates that, in most tasks, objectives optimized with UNet outperform even TransUNet by introducing global contextual information alongside local details. These findings underscore the versatility and effectiveness of our proposed method across diverse imaging modalities and medical domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.10373v2-abstract-full').style.display = 'none'; document.getElementById('2401.10373v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Early Accepted at ICPR-2024 for Oral Presentation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.09630">arXiv:2401.09630</a> <span> [<a href="https://arxiv.org/pdf/2401.09630">pdf</a>, <a href="https://arxiv.org/format/2401.09630">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CT Liver Segmentation via PVT-based Encoding and Refined Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Tomar%2C+N+K">Nikhil Kumar Tomar</a>, <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Antalek%2C+M">Matthew Antalek</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Ladner%2C+D">Daniela Ladner</a>, <a href="/search/?searchtype=author&query=Borhani%2C+A">Amir Borhani</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.09630v3-abstract-short" style="display: inline;"> Accurate liver segmentation from CT scans is essential for effective diagnosis and treatment planning. Computer-aided diagnosis systems promise to improve the precision of liver disease diagnosis, disease progression, and treatment planning. In response to the need, we propose a novel deep learning approach, \textit{\textbf{PVTFormer}}, that is built upon a pretrained pyramid vision transformer (P… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09630v3-abstract-full').style.display = 'inline'; document.getElementById('2401.09630v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.09630v3-abstract-full" style="display: none;"> Accurate liver segmentation from CT scans is essential for effective diagnosis and treatment planning. Computer-aided diagnosis systems promise to improve the precision of liver disease diagnosis, disease progression, and treatment planning. In response to the need, we propose a novel deep learning approach, \textit{\textbf{PVTFormer}}, that is built upon a pretrained pyramid vision transformer (PVT v2) combined with advanced residual upsampling and decoder block. By integrating a refined feature channel approach with a hierarchical decoding strategy, PVTFormer generates high quality segmentation masks by enhancing semantic features. Rigorous evaluation of the proposed method on Liver Tumor Segmentation Benchmark (LiTS) 2017 demonstrates that our proposed architecture not only achieves a high dice coefficient of 86.78\%, mIoU of 78.46\%, but also obtains a low HD of 3.50. The results underscore PVTFormer's efficacy in setting a new benchmark for state-of-the-art liver segmentation methods. The source code of the proposed PVTFormer is available at \url{https://github.com/DebeshJha/PVTFormer}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09630v3-abstract-full').style.display = 'none'; document.getElementById('2401.09630v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.17040">arXiv:2312.17040</a> <span> [<a href="https://arxiv.org/pdf/2312.17040">pdf</a>, <a href="https://arxiv.org/format/2312.17040">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s12145-023-01201-6">10.1007/s12145-023-01201-6 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> AI Powered Road Network Prediction with Multi-Modal Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gengec%2C+N+E">Necip Enes Gengec</a>, <a href="/search/?searchtype=author&query=Tari%2C+E">Ergin Tari</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.17040v1-abstract-short" style="display: inline;"> This study presents an innovative approach for automatic road detection with deep learning, by employing fusion strategies for utilizing both lower-resolution satellite imagery and GPS trajectory data, a concept never explored before. We rigorously investigate both early and late fusion strategies, and assess deep learning based road detection performance using different fusion settings. Our exten… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17040v1-abstract-full').style.display = 'inline'; document.getElementById('2312.17040v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.17040v1-abstract-full" style="display: none;"> This study presents an innovative approach for automatic road detection with deep learning, by employing fusion strategies for utilizing both lower-resolution satellite imagery and GPS trajectory data, a concept never explored before. We rigorously investigate both early and late fusion strategies, and assess deep learning based road detection performance using different fusion settings. Our extensive ablation studies assess the efficacy of our framework under diverse model architectures, loss functions, and geographic domains (Istanbul and Montreal). For an unbiased and complete evaluation of road detection results, we use both region-based and boundary-based evaluation metrics for road segmentation. The outcomes reveal that the ResUnet model outperforms U-Net and D-Linknet in road extraction tasks, achieving superior results over the benchmark study using low-resolution Sentinel-2 data. This research not only contributes to the field of automatic road detection but also offers novel insights into the utilization of data fusion methods in diverse applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17040v1-abstract-full').style.display = 'none'; document.getElementById('2312.17040v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11480">arXiv:2312.11480</a> <span> [<a href="https://arxiv.org/pdf/2312.11480">pdf</a>, <a href="https://arxiv.org/format/2312.11480">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Smooth Activation for Improved Disease Diagnosis and Organ Segmentation from Radiology Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Tomar%2C+N+K">Nikhil Kumar Tomar</a>, <a href="/search/?searchtype=author&query=Durak%2C+G">Gorkem Durak</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Antalek%2C+M">Matthew Antalek</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Ladner%2C+D">Daniela Ladner</a>, <a href="/search/?searchtype=author&query=Bohrani%2C+A">Amir Bohrani</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11480v1-abstract-short" style="display: inline;"> In this study, we propose a new activation function, called Adaptive Smooth Activation Unit (ASAU), tailored for optimized gradient propagation, thereby enhancing the proficiency of convolutional networks in medical image analysis. We apply this new activation function to two important and commonly used general tasks in medical image analysis: automatic disease diagnosis and organ segmentation in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11480v1-abstract-full').style.display = 'inline'; document.getElementById('2312.11480v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11480v1-abstract-full" style="display: none;"> In this study, we propose a new activation function, called Adaptive Smooth Activation Unit (ASAU), tailored for optimized gradient propagation, thereby enhancing the proficiency of convolutional networks in medical image analysis. We apply this new activation function to two important and commonly used general tasks in medical image analysis: automatic disease diagnosis and organ segmentation in CT and MRI. Our rigorous evaluation on the RadImageNet abdominal/pelvis (CT and MRI) dataset and Liver Tumor Segmentation Benchmark (LiTS) 2017 demonstrates that our ASAU-integrated frameworks not only achieve a substantial (4.80\%) improvement over ReLU in classification accuracy (disease detection) on abdominal CT and MRI but also achieves 1\%-3\% improvement in dice coefficient compared to widely used activations for `healthy liver tissue' segmentation. These improvements offer new baselines for developing a diagnostic tool, particularly for complex, challenging pathologies. The superior performance and adaptability of ASAU highlight its potential for integration into a wide range of image classification and segmentation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11480v1-abstract-full').style.display = 'none'; document.getElementById('2312.11480v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.05634">arXiv:2312.05634</a> <span> [<a href="https://arxiv.org/pdf/2312.05634">pdf</a>, <a href="https://arxiv.org/format/2312.05634">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PGDS: Pose-Guidance Deep Supervision for Mitigating Clothes-Changing in Person Re-Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Trinh%2C+Q">Quoc-Huy Trinh</a>, <a href="/search/?searchtype=author&query=Bui%2C+N">Nhat-Tan Bui</a>, <a href="/search/?searchtype=author&query=Hoang%2C+D">Dinh-Hieu Hoang</a>, <a href="/search/?searchtype=author&query=Thi%2C+P+V">Phuoc-Thao Vo Thi</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+H">Hai-Dang Nguyen</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Le%2C+N">Ngan Le</a>, <a href="/search/?searchtype=author&query=Tran%2C+M">Minh-Triet Tran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.05634v3-abstract-short" style="display: inline;"> Person Re-Identification (Re-ID) task seeks to enhance the tracking of multiple individuals by surveillance cameras. It supports multimodal tasks, including text-based person retrieval and human matching. One of the most significant challenges faced in Re-ID is clothes-changing, where the same person may appear in different outfits. While previous methods have made notable progress in maintaining… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05634v3-abstract-full').style.display = 'inline'; document.getElementById('2312.05634v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.05634v3-abstract-full" style="display: none;"> Person Re-Identification (Re-ID) task seeks to enhance the tracking of multiple individuals by surveillance cameras. It supports multimodal tasks, including text-based person retrieval and human matching. One of the most significant challenges faced in Re-ID is clothes-changing, where the same person may appear in different outfits. While previous methods have made notable progress in maintaining clothing data consistency and handling clothing change data, they still rely excessively on clothing information, which can limit performance due to the dynamic nature of human appearances. To mitigate this challenge, we propose the Pose-Guidance Deep Supervision (PGDS), an effective framework for learning pose guidance within the Re-ID task. It consists of three modules: a human encoder, a pose encoder, and a Pose-to-Human Projection module (PHP). Our framework guides the human encoder, i.e., the main re-identification model, with pose information from the pose encoder through multiple layers via the knowledge transfer mechanism from the PHP module, helping the human encoder learn body parts information without increasing computation resources in the inference stage. Through extensive experiments, our method surpasses the performance of current state-of-the-art methods, demonstrating its robustness and effectiveness for real-world applications. Our code is available at https://github.com/huyquoctrinh/PGDS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05634v3-abstract-full').style.display = 'none'; document.getElementById('2312.05634v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AVSS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.16700">arXiv:2311.16700</a> <span> [<a href="https://arxiv.org/pdf/2311.16700">pdf</a>, <a href="https://arxiv.org/format/2311.16700">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Tissues and Organs">q-bio.TO</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Intermediate Layers design in Knowledge Distillation for Kidney and Liver Tumor Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gorade%2C+V">Vandan Gorade</a>, <a href="/search/?searchtype=author&query=Mittal%2C+S">Sparsh Mittal</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.16700v2-abstract-short" style="display: inline;"> Knowledge distillation (KD) has demonstrated remarkable success across various domains, but its application to medical imaging tasks, such as kidney and liver tumor segmentation, has encountered challenges. Many existing KD methods are not specifically tailored for these tasks. Moreover, prevalent KD methods often lack a careful consideration of `what' and `from where' to distill knowledge from th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16700v2-abstract-full').style.display = 'inline'; document.getElementById('2311.16700v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.16700v2-abstract-full" style="display: none;"> Knowledge distillation (KD) has demonstrated remarkable success across various domains, but its application to medical imaging tasks, such as kidney and liver tumor segmentation, has encountered challenges. Many existing KD methods are not specifically tailored for these tasks. Moreover, prevalent KD methods often lack a careful consideration of `what' and `from where' to distill knowledge from the teacher to the student. This oversight may lead to issues like the accumulation of training bias within shallower student layers, potentially compromising the effectiveness of KD. To address these challenges, we propose Hierarchical Layer-selective Feedback Distillation (HLFD). HLFD strategically distills knowledge from a combination of middle layers to earlier layers and transfers final layer knowledge to intermediate layers at both the feature and pixel levels. This design allows the model to learn higher-quality representations from earlier layers, resulting in a robust and compact student model. Extensive quantitative evaluations reveal that HLFD outperforms existing methods by a significant margin. For example, in the kidney segmentation task, HLFD surpasses the student model (without KD) by over 10\%, significantly improving its focus on tumor-specific features. From a qualitative standpoint, the student model trained using HLFD excels at suppressing irrelevant information and can focus sharply on tumor-specific details, which opens a new pathway for more efficient and accurate diagnostic tools. Code is available \href{https://github.com/vangorade/RethinkingKD_ISBI24}{here}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.16700v2-abstract-full').style.display = 'none'; document.getElementById('2311.16700v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ISBI-2024 for Oral Presentation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.13069">arXiv:2311.13069</a> <span> [<a href="https://arxiv.org/pdf/2311.13069">pdf</a>, <a href="https://arxiv.org/format/2311.13069">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FuseNet: Self-Supervised Dual-Path Network for Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Kazerouni%2C+A">Amirhossein Kazerouni</a>, <a href="/search/?searchtype=author&query=Karimijafarbigloo%2C+S">Sanaz Karimijafarbigloo</a>, <a href="/search/?searchtype=author&query=Azad%2C+R">Reza Azad</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Merhof%2C+D">Dorit Merhof</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.13069v1-abstract-short" style="display: inline;"> Semantic segmentation, a crucial task in computer vision, often relies on labor-intensive and costly annotated datasets for training. In response to this challenge, we introduce FuseNet, a dual-stream framework for self-supervised semantic segmentation that eliminates the need for manual annotation. FuseNet leverages the shared semantic dependencies between the original and augmented images to cre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.13069v1-abstract-full').style.display = 'inline'; document.getElementById('2311.13069v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.13069v1-abstract-full" style="display: none;"> Semantic segmentation, a crucial task in computer vision, often relies on labor-intensive and costly annotated datasets for training. In response to this challenge, we introduce FuseNet, a dual-stream framework for self-supervised semantic segmentation that eliminates the need for manual annotation. FuseNet leverages the shared semantic dependencies between the original and augmented images to create a clustering space, effectively assigning pixels to semantically related clusters, and ultimately generating the segmentation map. Additionally, FuseNet incorporates a cross-modal fusion technique that extends the principles of CLIP by replacing textual data with augmented images. This approach enables the model to learn complex visual representations, enhancing robustness against variations similar to CLIP's text invariance. To further improve edge alignment and spatial consistency between neighboring pixels, we introduce an edge refinement loss. This loss function considers edge information to enhance spatial coherence, facilitating the grouping of nearby pixels with similar visual features. Extensive experiments on skin lesion and lung segmentation datasets demonstrate the effectiveness of our method. \href{https://github.com/xmindflow/FuseNet}{Codebase.} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.13069v1-abstract-full').style.display = 'none'; document.getElementById('2311.13069v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12617">arXiv:2311.12617</a> <span> [<a href="https://arxiv.org/pdf/2311.12617">pdf</a>, <a href="https://arxiv.org/format/2311.12617">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Unlabeled Data for 3D Medical Image Segmentation through Self-Supervised Contrastive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Karimijafarbigloo%2C+S">Sanaz Karimijafarbigloo</a>, <a href="/search/?searchtype=author&query=Azad%2C+R">Reza Azad</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Merhof%2C+D">Dorit Merhof</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12617v1-abstract-short" style="display: inline;"> Current 3D semi-supervised segmentation methods face significant challenges such as limited consideration of contextual information and the inability to generate reliable pseudo-labels for effective unsupervised data use. To address these challenges, we introduce two distinct subnetworks designed to explore and exploit the discrepancies between them, ultimately correcting the erroneous prediction… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12617v1-abstract-full').style.display = 'inline'; document.getElementById('2311.12617v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12617v1-abstract-full" style="display: none;"> Current 3D semi-supervised segmentation methods face significant challenges such as limited consideration of contextual information and the inability to generate reliable pseudo-labels for effective unsupervised data use. To address these challenges, we introduce two distinct subnetworks designed to explore and exploit the discrepancies between them, ultimately correcting the erroneous prediction results. More specifically, we identify regions of inconsistent predictions and initiate a targeted verification training process. This procedure strategically fine-tunes and harmonizes the predictions of the subnetworks, leading to enhanced utilization of contextual information. Furthermore, to adaptively fine-tune the network's representational capacity and reduce prediction uncertainty, we employ a self-supervised contrastive learning paradigm. For this, we use the network's confidence to distinguish between reliable and unreliable predictions. The model is then trained to effectively minimize unreliable predictions. Our experimental results for organ segmentation, obtained from clinical MRI and CT scans, demonstrate the effectiveness of our approach when compared to state-of-the-art methods. The codebase is accessible on \href{https://github.com/xmindflow/SSL-contrastive}{GitHub}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12617v1-abstract-full').style.display = 'none'; document.getElementById('2311.12617v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12486">arXiv:2311.12486</a> <span> [<a href="https://arxiv.org/pdf/2311.12486">pdf</a>, <a href="https://arxiv.org/format/2311.12486">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HCA-Net: Hierarchical Context Attention Network for Intervertebral Disc Semantic Labeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Bozorgpour%2C+A">Afshin Bozorgpour</a>, <a href="/search/?searchtype=author&query=Azad%2C+B">Bobby Azad</a>, <a href="/search/?searchtype=author&query=Azad%2C+R">Reza Azad</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Merhof%2C+D">Dorit Merhof</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12486v1-abstract-short" style="display: inline;"> Accurate and automated segmentation of intervertebral discs (IVDs) in medical images is crucial for assessing spine-related disorders, such as osteoporosis, vertebral fractures, or IVD herniation. We present HCA-Net, a novel contextual attention network architecture for semantic labeling of IVDs, with a special focus on exploiting prior geometric information. Our approach excels at processing feat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12486v1-abstract-full').style.display = 'inline'; document.getElementById('2311.12486v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12486v1-abstract-full" style="display: none;"> Accurate and automated segmentation of intervertebral discs (IVDs) in medical images is crucial for assessing spine-related disorders, such as osteoporosis, vertebral fractures, or IVD herniation. We present HCA-Net, a novel contextual attention network architecture for semantic labeling of IVDs, with a special focus on exploiting prior geometric information. Our approach excels at processing features across different scales and effectively consolidating them to capture the intricate spatial relationships within the spinal cord. To achieve this, HCA-Net models IVD labeling as a pose estimation problem, aiming to minimize the discrepancy between each predicted IVD location and its corresponding actual joint location. In addition, we introduce a skeletal loss term to reinforce the model's geometric dependence on the spine. This loss function is designed to constrain the model's predictions to a range that matches the general structure of the human vertebral skeleton. As a result, the network learns to reduce the occurrence of false predictions and adaptively improves the accuracy of IVD location estimation. Through extensive experimental evaluation on multi-center spine datasets, our approach consistently outperforms previous state-of-the-art methods on both MRI T1w and T2w modalities. The codebase is accessible to the public on \href{https://github.com/xmindflow/HCA-Net}{GitHub}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12486v1-abstract-full').style.display = 'none'; document.getElementById('2311.12486v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.18846">arXiv:2310.18846</a> <span> [<a href="https://arxiv.org/pdf/2310.18846">pdf</a>, <a href="https://arxiv.org/format/2310.18846">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> INCODE: Implicit Neural Conditioning with Prior Knowledge Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Kazerouni%2C+A">Amirhossein Kazerouni</a>, <a href="/search/?searchtype=author&query=Azad%2C+R">Reza Azad</a>, <a href="/search/?searchtype=author&query=Hosseini%2C+A">Alireza Hosseini</a>, <a href="/search/?searchtype=author&query=Merhof%2C+D">Dorit Merhof</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.18846v1-abstract-short" style="display: inline;"> Implicit Neural Representations (INRs) have revolutionized signal representation by leveraging neural networks to provide continuous and smooth representations of complex data. However, existing INRs face limitations in capturing fine-grained details, handling noise, and adapting to diverse signal types. To address these challenges, we introduce INCODE, a novel approach that enhances the control o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18846v1-abstract-full').style.display = 'inline'; document.getElementById('2310.18846v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.18846v1-abstract-full" style="display: none;"> Implicit Neural Representations (INRs) have revolutionized signal representation by leveraging neural networks to provide continuous and smooth representations of complex data. However, existing INRs face limitations in capturing fine-grained details, handling noise, and adapting to diverse signal types. To address these challenges, we introduce INCODE, a novel approach that enhances the control of the sinusoidal-based activation function in INRs using deep prior knowledge. INCODE comprises a harmonizer network and a composer network, where the harmonizer network dynamically adjusts key parameters of the activation function. Through a task-specific pre-trained model, INCODE adapts the task-specific parameters to optimize the representation process. Our approach not only excels in representation, but also extends its prowess to tackle complex tasks such as audio, image, and 3D shape reconstructions, as well as intricate challenges such as neural radiance fields (NeRFs), and inverse problems, including denoising, super-resolution, inpainting, and CT reconstruction. Through comprehensive experiments, INCODE demonstrates its superiority in terms of robustness, accuracy, quality, and convergence rate, broadening the scope of signal representation. Please visit the project's website for details on the proposed method and access to the code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18846v1-abstract-full').style.display = 'none'; document.getElementById('2310.18846v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at WACV 2024 conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.17764">arXiv:2310.17764</a> <span> [<a href="https://arxiv.org/pdf/2310.17764">pdf</a>, <a href="https://arxiv.org/format/2310.17764">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SynergyNet: Bridging the Gap between Discrete and Continuous Representations for Precise Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gorade%2C+V">Vandan Gorade</a>, <a href="/search/?searchtype=author&query=Mittal%2C+S">Sparsh Mittal</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.17764v1-abstract-short" style="display: inline;"> In recent years, continuous latent space (CLS) and discrete latent space (DLS) deep learning models have been proposed for medical image analysis for improved performance. However, these models encounter distinct challenges. CLS models capture intricate details but often lack interpretability in terms of structural representation and robustness due to their emphasis on low-level features. Converse… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17764v1-abstract-full').style.display = 'inline'; document.getElementById('2310.17764v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.17764v1-abstract-full" style="display: none;"> In recent years, continuous latent space (CLS) and discrete latent space (DLS) deep learning models have been proposed for medical image analysis for improved performance. However, these models encounter distinct challenges. CLS models capture intricate details but often lack interpretability in terms of structural representation and robustness due to their emphasis on low-level features. Conversely, DLS models offer interpretability, robustness, and the ability to capture coarse-grained information thanks to their structured latent space. However, DLS models have limited efficacy in capturing fine-grained details. To address the limitations of both DLS and CLS models, we propose SynergyNet, a novel bottleneck architecture designed to enhance existing encoder-decoder segmentation frameworks. SynergyNet seamlessly integrates discrete and continuous representations to harness complementary information and successfully preserves both fine and coarse-grained details in the learned representations. Our extensive experiment on multi-organ segmentation and cardiac datasets demonstrates that SynergyNet outperforms other state of the art methods, including TransUNet: dice scores improving by 2.16%, and Hausdorff scores improving by 11.13%, respectively. When evaluating skin lesion and brain tumor segmentation datasets, we observe a remarkable improvement of 1.71% in Intersection-over Union scores for skin lesion segmentation and of 8.58% for brain tumor segmentation. Our innovative approach paves the way for enhancing the overall performance and capabilities of deep learning models in the critical domain of medical image analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17764v1-abstract-full').style.display = 'none'; document.getElementById('2310.17764v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at WACV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.12868">arXiv:2310.12868</a> <span> [<a href="https://arxiv.org/pdf/2310.12868">pdf</a>, <a href="https://arxiv.org/format/2310.12868">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> EMIT-Diff: Enhancing Medical Image Segmentation via Text-Guided Diffusion Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Yao%2C+L">Lanhong Yao</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Bin Wang</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Keles%2C+E">Elif Keles</a>, <a href="/search/?searchtype=author&query=Medetalibeyoglu%2C+A">Alpay Medetalibeyoglu</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.12868v1-abstract-short" style="display: inline;"> Large-scale, big-variant, and high-quality data are crucial for developing robust and successful deep-learning models for medical applications since they potentially enable better generalization performance and avoid overfitting. However, the scarcity of high-quality labeled data always presents significant challenges. This paper proposes a novel approach to address this challenge by developing co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.12868v1-abstract-full').style.display = 'inline'; document.getElementById('2310.12868v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.12868v1-abstract-full" style="display: none;"> Large-scale, big-variant, and high-quality data are crucial for developing robust and successful deep-learning models for medical applications since they potentially enable better generalization performance and avoid overfitting. However, the scarcity of high-quality labeled data always presents significant challenges. This paper proposes a novel approach to address this challenge by developing controllable diffusion models for medical image synthesis, called EMIT-Diff. We leverage recent diffusion probabilistic models to generate realistic and diverse synthetic medical image data that preserve the essential characteristics of the original medical images by incorporating edge information of objects to guide the synthesis process. In our approach, we ensure that the synthesized samples adhere to medically relevant constraints and preserve the underlying structure of imaging data. Due to the random sampling process by the diffusion model, we can generate an arbitrary number of synthetic images with diverse appearances. To validate the effectiveness of our proposed method, we conduct an extensive set of medical image segmentation experiments on multiple datasets, including Ultrasound breast (+13.87%), CT spleen (+0.38%), and MRI prostate (+7.78%), achieving significant improvements over the baseline segmentation methods. For the first time, to our best knowledge, the promising results demonstrate the effectiveness of our EMIT-Diff for medical image segmentation tasks and show the feasibility of introducing a first-ever text-guided diffusion model for general medical image segmentation tasks. With carefully designed ablation experiments, we investigate the influence of various data augmentation ratios, hyper-parameter settings, patch size for generating random merging mask settings, and combined influence with different network architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.12868v1-abstract-full').style.display = 'none'; document.getElementById('2310.12868v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.10126">arXiv:2310.10126</a> <span> [<a href="https://arxiv.org/pdf/2310.10126">pdf</a>, <a href="https://arxiv.org/format/2310.10126">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Non-monotonic Smooth Activation Function </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Biswas%2C+K">Koushik Biswas</a>, <a href="/search/?searchtype=author&query=Karri%2C+M">Meghana Karri</a>, <a href="/search/?searchtype=author&query=Ba%C4%9Fc%C4%B1%2C+U">Ula艧 Ba臒c谋</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.10126v1-abstract-short" style="display: inline;"> Activation functions are crucial in deep learning models since they introduce non-linearity into the networks, allowing them to learn from errors and make adjustments, which is essential for learning complex patterns. The essential purpose of activation functions is to transform unprocessed input signals into significant output activations, promoting information transmission throughout the neural… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.10126v1-abstract-full').style.display = 'inline'; document.getElementById('2310.10126v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.10126v1-abstract-full" style="display: none;"> Activation functions are crucial in deep learning models since they introduce non-linearity into the networks, allowing them to learn from errors and make adjustments, which is essential for learning complex patterns. The essential purpose of activation functions is to transform unprocessed input signals into significant output activations, promoting information transmission throughout the neural network. In this study, we propose a new activation function called Sqish, which is a non-monotonic and smooth function and an alternative to existing ones. We showed its superiority in classification, object detection, segmentation tasks, and adversarial robustness experiments. We got an 8.21% improvement over ReLU on the CIFAR100 dataset with the ShuffleNet V2 model in the FGSM adversarial attack. We also got a 5.87% improvement over ReLU on image classification on the CIFAR100 dataset with the ShuffleNet V2 model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.10126v1-abstract-full').style.display = 'none'; document.getElementById('2310.10126v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 Pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.01413">arXiv:2310.01413</a> <span> [<a href="https://arxiv.org/pdf/2310.01413">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A multi-institutional pediatric dataset of clinical radiology MRIs by the Children's Brain Tumor Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Familiar%2C+A+M">Ariana M. Familiar</a>, <a href="/search/?searchtype=author&query=Kazerooni%2C+A+F">Anahita Fathi Kazerooni</a>, <a href="/search/?searchtype=author&query=Anderson%2C+H">Hannah Anderson</a>, <a href="/search/?searchtype=author&query=Lubneuski%2C+A">Aliaksandr Lubneuski</a>, <a href="/search/?searchtype=author&query=Viswanathan%2C+K">Karthik Viswanathan</a>, <a href="/search/?searchtype=author&query=Breslow%2C+R">Rocky Breslow</a>, <a href="/search/?searchtype=author&query=Khalili%2C+N">Nastaran Khalili</a>, <a href="/search/?searchtype=author&query=Bagheri%2C+S">Sina Bagheri</a>, <a href="/search/?searchtype=author&query=Haldar%2C+D">Debanjan Haldar</a>, <a href="/search/?searchtype=author&query=Kim%2C+M+C">Meen Chul Kim</a>, <a href="/search/?searchtype=author&query=Arif%2C+S">Sherjeel Arif</a>, <a href="/search/?searchtype=author&query=Madhogarhia%2C+R">Rachel Madhogarhia</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T+Q">Thinh Q. Nguyen</a>, <a href="/search/?searchtype=author&query=Frenkel%2C+E+A">Elizabeth A. Frenkel</a>, <a href="/search/?searchtype=author&query=Helili%2C+Z">Zeinab Helili</a>, <a href="/search/?searchtype=author&query=Harrison%2C+J">Jessica Harrison</a>, <a href="/search/?searchtype=author&query=Farahani%2C+K">Keyvan Farahani</a>, <a href="/search/?searchtype=author&query=Linguraru%2C+M+G">Marius George Linguraru</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Stevens%2C+J">Jeffrey Stevens</a>, <a href="/search/?searchtype=author&query=Leary%2C+S">Sarah Leary</a>, <a href="/search/?searchtype=author&query=Lober%2C+R+M">Robert M. Lober</a>, <a href="/search/?searchtype=author&query=Campion%2C+S">Stephani Campion</a>, <a href="/search/?searchtype=author&query=Smith%2C+A+A">Amy A. Smith</a> , et al. (15 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.01413v1-abstract-short" style="display: inline;"> Pediatric brain and spinal cancers remain the leading cause of cancer-related death in children. Advancements in clinical decision-support in pediatric neuro-oncology utilizing the wealth of radiology imaging data collected through standard care, however, has significantly lagged other domains. Such data is ripe for use with predictive analytics such as artificial intelligence (AI) methods, which… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.01413v1-abstract-full').style.display = 'inline'; document.getElementById('2310.01413v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.01413v1-abstract-full" style="display: none;"> Pediatric brain and spinal cancers remain the leading cause of cancer-related death in children. Advancements in clinical decision-support in pediatric neuro-oncology utilizing the wealth of radiology imaging data collected through standard care, however, has significantly lagged other domains. Such data is ripe for use with predictive analytics such as artificial intelligence (AI) methods, which require large datasets. To address this unmet need, we provide a multi-institutional, large-scale pediatric dataset of 23,101 multi-parametric MRI exams acquired through routine care for 1,526 brain tumor patients, as part of the Children's Brain Tumor Network. This includes longitudinal MRIs across various cancer diagnoses, with associated patient-level clinical information, digital pathology slides, as well as tissue genotype and omics data. To facilitate downstream analysis, treatment-na茂ve images for 370 subjects were processed and released through the NCI Childhood Cancer Data Initiative via the Cancer Data Service. Through ongoing efforts to continuously build these imaging repositories, our aim is to accelerate discovery and translational AI models with real-world data, to ultimately empower precision medicine for children. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.01413v1-abstract-full').style.display = 'none'; document.getElementById('2310.01413v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.09866">arXiv:2309.09866</a> <span> [<a href="https://arxiv.org/pdf/2309.09866">pdf</a>, <a href="https://arxiv.org/format/2309.09866">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Domain Generalization with Fourier Transform and Soft Thresholding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Pan%2C+H">Hongyi Pan</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Bin Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Zhu%2C+X">Xin Zhu</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Cetin%2C+A+E">Ahmet Enis Cetin</a>, <a href="/search/?searchtype=author&query=Spampinato%2C+C">Concetto Spampinato</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.09866v3-abstract-short" style="display: inline;"> Domain generalization aims to train models on multiple source domains so that they can generalize well to unseen target domains. Among many domain generalization methods, Fourier-transform-based domain generalization methods have gained popularity primarily because they exploit the power of Fourier transformation to capture essential patterns and regularities in the data, making the model more rob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09866v3-abstract-full').style.display = 'inline'; document.getElementById('2309.09866v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.09866v3-abstract-full" style="display: none;"> Domain generalization aims to train models on multiple source domains so that they can generalize well to unseen target domains. Among many domain generalization methods, Fourier-transform-based domain generalization methods have gained popularity primarily because they exploit the power of Fourier transformation to capture essential patterns and regularities in the data, making the model more robust to domain shifts. The mainstream Fourier-transform-based domain generalization swaps the Fourier amplitude spectrum while preserving the phase spectrum between the source and the target images. However, it neglects background interference in the amplitude spectrum. To overcome this limitation, we introduce a soft-thresholding function in the Fourier domain. We apply this newly designed algorithm to retinal fundus image segmentation, which is important for diagnosing ocular diseases but the neural network's performance can degrade across different sources due to domain shifts. The proposed technique basically enhances fundus image augmentation by eliminating small values in the Fourier domain and providing better generalization. The innovative nature of the soft thresholding fused with Fourier-transform-based domain generalization improves neural network models' performance by reducing the target images' background interference significantly. Experiments on public data validate our approach's effectiveness over conventional and state-of-the-art methods with superior segmentation metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09866v3-abstract-full').style.display = 'none'; document.getElementById('2309.09866v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper was accepted to ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.05857">arXiv:2309.05857</a> <span> [<a href="https://arxiv.org/pdf/2309.05857">pdf</a>, <a href="https://arxiv.org/format/2309.05857">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Radiomics Boosts Deep Learning Model for IPMN Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yao%2C+L">Lanhong Yao</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Demir%2C+U">Ugur Demir</a>, <a href="/search/?searchtype=author&query=Keles%2C+E">Elif Keles</a>, <a href="/search/?searchtype=author&query=Vendrami%2C+C">Camila Vendrami</a>, <a href="/search/?searchtype=author&query=Agarunov%2C+E">Emil Agarunov</a>, <a href="/search/?searchtype=author&query=Bolan%2C+C">Candice Bolan</a>, <a href="/search/?searchtype=author&query=Schoots%2C+I">Ivo Schoots</a>, <a href="/search/?searchtype=author&query=Bruno%2C+M">Marc Bruno</a>, <a href="/search/?searchtype=author&query=Keswani%2C+R">Rajesh Keswani</a>, <a href="/search/?searchtype=author&query=Miller%2C+F">Frank Miller</a>, <a href="/search/?searchtype=author&query=Gonda%2C+T">Tamas Gonda</a>, <a href="/search/?searchtype=author&query=Yazici%2C+C">Cemal Yazici</a>, <a href="/search/?searchtype=author&query=Tirkes%2C+T">Temel Tirkes</a>, <a href="/search/?searchtype=author&query=Wallace%2C+M">Michael Wallace</a>, <a href="/search/?searchtype=author&query=Spampinato%2C+C">Concetto Spampinato</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.05857v1-abstract-short" style="display: inline;"> Intraductal Papillary Mucinous Neoplasm (IPMN) cysts are pre-malignant pancreas lesions, and they can progress into pancreatic cancer. Therefore, detecting and stratifying their risk level is of ultimate importance for effective treatment planning and disease control. However, this is a highly challenging task because of the diverse and irregular shape, texture, and size of the IPMN cysts as well… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05857v1-abstract-full').style.display = 'inline'; document.getElementById('2309.05857v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.05857v1-abstract-full" style="display: none;"> Intraductal Papillary Mucinous Neoplasm (IPMN) cysts are pre-malignant pancreas lesions, and they can progress into pancreatic cancer. Therefore, detecting and stratifying their risk level is of ultimate importance for effective treatment planning and disease control. However, this is a highly challenging task because of the diverse and irregular shape, texture, and size of the IPMN cysts as well as the pancreas. In this study, we propose a novel computer-aided diagnosis pipeline for IPMN risk classification from multi-contrast MRI scans. Our proposed analysis framework includes an efficient volumetric self-adapting segmentation strategy for pancreas delineation, followed by a newly designed deep learning-based classification scheme with a radiomics-based predictive approach. We test our proposed decision-fusion model in multi-center data sets of 246 multi-contrast MRI scans and obtain superior performance to the state of the art (SOTA) in this field. Our ablation studies demonstrate the significance of both radiomics and deep learning modules for achieving the new SOTA performance compared to international guidelines and published studies (81.9\% vs 61.3\% in accuracy). Our findings have important implications for clinical decision-making. In a series of rigorous experiments on multi-center data sets (246 MRI scans from five centers), we achieved unprecedented performance (81.9\% accuracy). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05857v1-abstract-full').style.display = 'none'; document.getElementById('2309.05857v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, MICCAI MLMI 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.00143">arXiv:2309.00143</a> <span> [<a href="https://arxiv.org/pdf/2309.00143">pdf</a>, <a href="https://arxiv.org/format/2309.00143">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Self-supervised Semantic Segmentation: Consistency over Transformation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Karimijafarbigloo%2C+S">Sanaz Karimijafarbigloo</a>, <a href="/search/?searchtype=author&query=Azad%2C+R">Reza Azad</a>, <a href="/search/?searchtype=author&query=Kazerouni%2C+A">Amirhossein Kazerouni</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Merhof%2C+D">Dorit Merhof</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.00143v1-abstract-short" style="display: inline;"> Accurate medical image segmentation is of utmost importance for enabling automated clinical decision procedures. However, prevailing supervised deep learning approaches for medical image segmentation encounter significant challenges due to their heavy dependence on extensive labeled training data. To tackle this issue, we propose a novel self-supervised algorithm, \textbf{S$^3$-Net}, which integra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00143v1-abstract-full').style.display = 'inline'; document.getElementById('2309.00143v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.00143v1-abstract-full" style="display: none;"> Accurate medical image segmentation is of utmost importance for enabling automated clinical decision procedures. However, prevailing supervised deep learning approaches for medical image segmentation encounter significant challenges due to their heavy dependence on extensive labeled training data. To tackle this issue, we propose a novel self-supervised algorithm, \textbf{S$^3$-Net}, which integrates a robust framework based on the proposed Inception Large Kernel Attention (I-LKA) modules. This architectural enhancement makes it possible to comprehensively capture contextual information while preserving local intricacies, thereby enabling precise semantic segmentation. Furthermore, considering that lesions in medical images often exhibit deformations, we leverage deformable convolution as an integral component to effectively capture and delineate lesion deformations for superior object boundary definition. Additionally, our self-supervised strategy emphasizes the acquisition of invariance to affine transformations, which is commonly encountered in medical scenarios. This emphasis on robustness with respect to geometric distortions significantly enhances the model's ability to accurately model and handle such distortions. To enforce spatial consistency and promote the grouping of spatially connected image pixels with similar feature representations, we introduce a spatial consistency loss term. This aids the network in effectively capturing the relationships among neighboring pixels and enhancing the overall segmentation quality. The S$^3$-Net approach iteratively learns pixel-level feature representations for image content clustering in an end-to-end manner. Our experimental results on skin lesion and lung organ segmentation tasks show the superior performance of our method compared to the SOTA approaches. https://github.com/mindflow-institue/SSCT <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00143v1-abstract-full').style.display = 'none'; document.getElementById('2309.00143v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in ICCV 2023 workshop CVAMD</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.00121">arXiv:2309.00121</a> <span> [<a href="https://arxiv.org/pdf/2309.00121">pdf</a>, <a href="https://arxiv.org/format/2309.00121">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Beyond Self-Attention: Deformable Large Kernel Attention for Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Azad%2C+R">Reza Azad</a>, <a href="/search/?searchtype=author&query=Niggemeier%2C+L">Leon Niggemeier</a>, <a href="/search/?searchtype=author&query=Huttemann%2C+M">Michael Huttemann</a>, <a href="/search/?searchtype=author&query=Kazerouni%2C+A">Amirhossein Kazerouni</a>, <a href="/search/?searchtype=author&query=Aghdam%2C+E+K">Ehsan Khodapanah Aghdam</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Merhof%2C+D">Dorit Merhof</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.00121v1-abstract-short" style="display: inline;"> Medical image segmentation has seen significant improvements with transformer models, which excel in grasping far-reaching contexts and global contextual information. However, the increasing computational demands of these models, proportional to the squared token count, limit their depth and resolution capabilities. Most current methods process D volumetric image data slice-by-slice (called pseudo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00121v1-abstract-full').style.display = 'inline'; document.getElementById('2309.00121v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.00121v1-abstract-full" style="display: none;"> Medical image segmentation has seen significant improvements with transformer models, which excel in grasping far-reaching contexts and global contextual information. However, the increasing computational demands of these models, proportional to the squared token count, limit their depth and resolution capabilities. Most current methods process D volumetric image data slice-by-slice (called pseudo 3D), missing crucial inter-slice information and thus reducing the model's overall performance. To address these challenges, we introduce the concept of \textbf{Deformable Large Kernel Attention (D-LKA Attention)}, a streamlined attention mechanism employing large convolution kernels to fully appreciate volumetric context. This mechanism operates within a receptive field akin to self-attention while sidestepping the computational overhead. Additionally, our proposed attention mechanism benefits from deformable convolutions to flexibly warp the sampling grid, enabling the model to adapt appropriately to diverse data patterns. We designed both 2D and 3D adaptations of the D-LKA Attention, with the latter excelling in cross-depth data understanding. Together, these components shape our novel hierarchical Vision Transformer architecture, the \textit{D-LKA Net}. Evaluations of our model against leading methods on popular medical segmentation datasets (Synapse, NIH Pancreas, and Skin lesion) demonstrate its superior performance. Our code implementation is publicly available at the: https://github.com/mindflow-institue/deformableLKA <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00121v1-abstract-full').style.display = 'none'; document.getElementById('2309.00121v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.00108">arXiv:2309.00108</a> <span> [<a href="https://arxiv.org/pdf/2309.00108">pdf</a>, <a href="https://arxiv.org/format/2309.00108">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Laplacian-Former: Overcoming the Limitations of Vision Transformers in Local Texture Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Azad%2C+R">Reza Azad</a>, <a href="/search/?searchtype=author&query=Kazerouni%2C+A">Amirhossein Kazerouni</a>, <a href="/search/?searchtype=author&query=Azad%2C+B">Babak Azad</a>, <a href="/search/?searchtype=author&query=Aghdam%2C+E+K">Ehsan Khodapanah Aghdam</a>, <a href="/search/?searchtype=author&query=Velichko%2C+Y">Yury Velichko</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Merhof%2C+D">Dorit Merhof</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.00108v1-abstract-short" style="display: inline;"> Vision Transformer (ViT) models have demonstrated a breakthrough in a wide range of computer vision tasks. However, compared to the Convolutional Neural Network (CNN) models, it has been observed that the ViT models struggle to capture high-frequency components of images, which can limit their ability to detect local textures and edge information. As abnormalities in human tissue, such as tumors a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00108v1-abstract-full').style.display = 'inline'; document.getElementById('2309.00108v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.00108v1-abstract-full" style="display: none;"> Vision Transformer (ViT) models have demonstrated a breakthrough in a wide range of computer vision tasks. However, compared to the Convolutional Neural Network (CNN) models, it has been observed that the ViT models struggle to capture high-frequency components of images, which can limit their ability to detect local textures and edge information. As abnormalities in human tissue, such as tumors and lesions, may greatly vary in structure, texture, and shape, high-frequency information such as texture is crucial for effective semantic segmentation tasks. To address this limitation in ViT models, we propose a new technique, Laplacian-Former, that enhances the self-attention map by adaptively re-calibrating the frequency information in a Laplacian pyramid. More specifically, our proposed method utilizes a dual attention mechanism via efficient attention and frequency attention while the efficient attention mechanism reduces the complexity of self-attention to linear while producing the same output, selectively intensifying the contribution of shape and texture features. Furthermore, we introduce a novel efficient enhancement multi-scale bridge that effectively transfers spatial information from the encoder to the decoder while preserving the fundamental features. We demonstrate the efficacy of Laplacian-former on multi-organ and skin lesion segmentation tasks with +1.87\% and +0.76\% dice scores compared to SOTA approaches, respectively. Our implementation is publically available at https://github.com/mindflow-institue/Laplacian-Former <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.00108v1-abstract-full').style.display = 'none'; document.getElementById('2309.00108v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in the main conference MICCAI 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.03709">arXiv:2308.03709</a> <span> [<a href="https://arxiv.org/pdf/2308.03709">pdf</a>, <a href="https://arxiv.org/format/2308.03709">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Prototype Learning for Out-of-Distribution Polyp Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tomar%2C+N+K">Nikhil Kumar Tomar</a>, <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.03709v1-abstract-short" style="display: inline;"> Existing polyp segmentation models from colonoscopy images often fail to provide reliable segmentation results on datasets from different centers, limiting their applicability. Our objective in this study is to create a robust and well-generalized segmentation model named PrototypeLab that can assist in polyp segmentation. To achieve this, we incorporate various lighting modes such as White light… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03709v1-abstract-full').style.display = 'inline'; document.getElementById('2308.03709v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.03709v1-abstract-full" style="display: none;"> Existing polyp segmentation models from colonoscopy images often fail to provide reliable segmentation results on datasets from different centers, limiting their applicability. Our objective in this study is to create a robust and well-generalized segmentation model named PrototypeLab that can assist in polyp segmentation. To achieve this, we incorporate various lighting modes such as White light imaging (WLI), Blue light imaging (BLI), Linked color imaging (LCI), and Flexible spectral imaging color enhancement (FICE) into our new segmentation model, that learns to create prototypes for each class of object present in the images. These prototypes represent the characteristic features of the objects, such as their shape, texture, color. Our model is designed to perform effectively on out-of-distribution (OOD) datasets from multiple centers. We first generate a coarse mask that is used to learn prototypes for the main object class, which are then employed to generate the final segmentation mask. By using prototypes to represent the main class, our approach handles the variability present in the medical images and generalize well to new data since prototype capture the underlying distribution of the data. PrototypeLab offers a promising solution with a dice coefficient of $\geq$ 90\% and mIoU $\geq$ 85\% with a near real-time processing speed for polyp segmentation. It achieved superior performance on OOD datasets compared to 16 state-of-the-art image segmentation architectures, potentially improving clinical outcomes. Codes are available at https://github.com/xxxxx/PrototypeLab. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03709v1-abstract-full').style.display = 'none'; document.getElementById('2308.03709v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.00128">arXiv:2308.00128</a> <span> [<a href="https://arxiv.org/pdf/2308.00128">pdf</a>, <a href="https://arxiv.org/format/2308.00128">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Ensemble Learning with Residual Transformer for Brain Tumor Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Yao%2C+L">Lanhong Yao</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.00128v1-abstract-short" style="display: inline;"> Brain tumor segmentation is an active research area due to the difficulty in delineating highly complex shaped and textured tumors as well as the failure of the commonly used U-Net architectures. The combination of different neural architectures is among the mainstream research recently, particularly the combination of U-Net with Transformers because of their innate attention mechanism and pixel-w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.00128v1-abstract-full').style.display = 'inline'; document.getElementById('2308.00128v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.00128v1-abstract-full" style="display: none;"> Brain tumor segmentation is an active research area due to the difficulty in delineating highly complex shaped and textured tumors as well as the failure of the commonly used U-Net architectures. The combination of different neural architectures is among the mainstream research recently, particularly the combination of U-Net with Transformers because of their innate attention mechanism and pixel-wise labeling. Different from previous efforts, this paper proposes a novel network architecture that integrates Transformers into a self-adaptive U-Net to draw out 3D volumetric contexts with reasonable computational costs. We further add a residual connection to prevent degradation in information flow and explore ensemble methods, as the evaluated models have edges on different cases and sub-regions. On the BraTS 2021 dataset (3D), our model achieves 87.6% mean Dice score and outperforms the state-of-the-art methods, demonstrating the potential for combining multiple architectures to optimize brain tumor segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.00128v1-abstract-full').style.display = 'none'; document.getElementById('2308.00128v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 4 figures, ISBI 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.16262">arXiv:2307.16262</a> <span> [<a href="https://arxiv.org/pdf/2307.16262">pdf</a>, <a href="https://arxiv.org/format/2307.16262">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Validating polyp and instrument segmentation methods in colonoscopy through Medico 2020 and MedAI 2021 Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Sharma%2C+V">Vanshali Sharma</a>, <a href="/search/?searchtype=author&query=Banik%2C+D">Debapriya Banik</a>, <a href="/search/?searchtype=author&query=Bhattacharya%2C+D">Debayan Bhattacharya</a>, <a href="/search/?searchtype=author&query=Roy%2C+K">Kaushiki Roy</a>, <a href="/search/?searchtype=author&query=Hicks%2C+S+A">Steven A. Hicks</a>, <a href="/search/?searchtype=author&query=Tomar%2C+N+K">Nikhil Kumar Tomar</a>, <a href="/search/?searchtype=author&query=Thambawita%2C+V">Vajira Thambawita</a>, <a href="/search/?searchtype=author&query=Krenzer%2C+A">Adrian Krenzer</a>, <a href="/search/?searchtype=author&query=Ji%2C+G">Ge-Peng Ji</a>, <a href="/search/?searchtype=author&query=Poudel%2C+S">Sahadev Poudel</a>, <a href="/search/?searchtype=author&query=Batchkala%2C+G">George Batchkala</a>, <a href="/search/?searchtype=author&query=Alam%2C+S">Saruar Alam</a>, <a href="/search/?searchtype=author&query=Ahmed%2C+A+M+A">Awadelrahman M. A. Ahmed</a>, <a href="/search/?searchtype=author&query=Trinh%2C+Q">Quoc-Huy Trinh</a>, <a href="/search/?searchtype=author&query=Khan%2C+Z">Zeshan Khan</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T">Tien-Phat Nguyen</a>, <a href="/search/?searchtype=author&query=Shrestha%2C+S">Shruti Shrestha</a>, <a href="/search/?searchtype=author&query=Nathan%2C+S">Sabari Nathan</a>, <a href="/search/?searchtype=author&query=Gwak%2C+J">Jeonghwan Gwak</a>, <a href="/search/?searchtype=author&query=Jha%2C+R+K">Ritika K. Jha</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zheyuan Zhang</a>, <a href="/search/?searchtype=author&query=Schlaefer%2C+A">Alexander Schlaefer</a>, <a href="/search/?searchtype=author&query=Bhattacharjee%2C+D">Debotosh Bhattacharjee</a>, <a href="/search/?searchtype=author&query=Bhuyan%2C+M+K">M. K. Bhuyan</a> , et al. (8 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.16262v4-abstract-short" style="display: inline;"> Automatic analysis of colonoscopy images has been an active field of research motivated by the importance of early detection of precancerous polyps. However, detecting polyps during the live examination can be challenging due to various factors such as variation of skills and experience among the endoscopists, lack of attentiveness, and fatigue leading to a high polyp miss-rate. Deep learning has… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16262v4-abstract-full').style.display = 'inline'; document.getElementById('2307.16262v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.16262v4-abstract-full" style="display: none;"> Automatic analysis of colonoscopy images has been an active field of research motivated by the importance of early detection of precancerous polyps. However, detecting polyps during the live examination can be challenging due to various factors such as variation of skills and experience among the endoscopists, lack of attentiveness, and fatigue leading to a high polyp miss-rate. Deep learning has emerged as a promising solution to this challenge as it can assist endoscopists in detecting and classifying overlooked polyps and abnormalities in real time. In addition to the algorithm's accuracy, transparency and interpretability are crucial to explaining the whys and hows of the algorithm's prediction. Further, most algorithms are developed in private data, closed source, or proprietary software, and methods lack reproducibility. Therefore, to promote the development of efficient and transparent methods, we have organized the "Medico automatic polyp segmentation (Medico 2020)" and "MedAI: Transparency in Medical Image Segmentation (MedAI 2021)" competitions. We present a comprehensive summary and analyze each contribution, highlight the strength of the best-performing methods, and discuss the possibility of clinical translations of such methods into the clinic. For the transparency task, a multi-disciplinary team, including expert gastroenterologists, accessed each submission and evaluated the team based on open-source practices, failure case analysis, ablation studies, usability and understandability of evaluations to gain a deeper understanding of the models' credibility for clinical deployment. Through the comprehensive analysis of the challenge, we not only highlight the advancements in polyp and surgical instrument segmentation but also encourage qualitative evaluation for building more transparent and understandable AI-based colonoscopy systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.16262v4-abstract-full').style.display = 'none'; document.getElementById('2307.16262v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.08140">arXiv:2307.08140</a> <span> [<a href="https://arxiv.org/pdf/2307.08140">pdf</a>, <a href="https://arxiv.org/format/2307.08140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GastroVision: A Multi-class Endoscopy Image Dataset for Computer Aided Gastrointestinal Disease Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Jha%2C+D">Debesh Jha</a>, <a href="/search/?searchtype=author&query=Sharma%2C+V">Vanshali Sharma</a>, <a href="/search/?searchtype=author&query=Dasu%2C+N">Neethi Dasu</a>, <a href="/search/?searchtype=author&query=Tomar%2C+N+K">Nikhil Kumar Tomar</a>, <a href="/search/?searchtype=author&query=Hicks%2C+S">Steven Hicks</a>, <a href="/search/?searchtype=author&query=Bhuyan%2C+M+K">M. K. Bhuyan</a>, <a href="/search/?searchtype=author&query=Das%2C+P+K">Pradip K. Das</a>, <a href="/search/?searchtype=author&query=Riegler%2C+M+A">Michael A. Riegler</a>, <a href="/search/?searchtype=author&query=Halvorsen%2C+P">P氓l Halvorsen</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=de+Lange%2C+T">Thomas de Lange</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.08140v2-abstract-short" style="display: inline;"> Integrating real-time artificial intelligence (AI) systems in clinical practices faces challenges such as scalability and acceptance. These challenges include data availability, biased outcomes, data quality, lack of transparency, and underperformance on unseen datasets from different distributions. The scarcity of large-scale, precisely labeled, and diverse datasets are the major challenge for cl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.08140v2-abstract-full').style.display = 'inline'; document.getElementById('2307.08140v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.08140v2-abstract-full" style="display: none;"> Integrating real-time artificial intelligence (AI) systems in clinical practices faces challenges such as scalability and acceptance. These challenges include data availability, biased outcomes, data quality, lack of transparency, and underperformance on unseen datasets from different distributions. The scarcity of large-scale, precisely labeled, and diverse datasets are the major challenge for clinical integration. This scarcity is also due to the legal restrictions and extensive manual efforts required for accurate annotations from clinicians. To address these challenges, we present \textit{GastroVision}, a multi-center open-access gastrointestinal (GI) endoscopy dataset that includes different anatomical landmarks, pathological abnormalities, polyp removal cases and normal findings (a total of 27 classes) from the GI tract. The dataset comprises 8,000 images acquired from B忙rum Hospital in Norway and Karolinska University Hospital in Sweden and was annotated and verified by experienced GI endoscopists. Furthermore, we validate the significance of our dataset with extensive benchmarking based on the popular deep learning based baseline models. We believe our dataset can facilitate the development of AI-based algorithms for GI disease detection and classification. Our dataset is available at \url{https://osf.io/84e7f/}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.08140v2-abstract-full').style.display = 'none'; document.getElementById('2307.08140v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.02984">arXiv:2307.02984</a> <span> [<a href="https://arxiv.org/pdf/2307.02984">pdf</a>, <a href="https://arxiv.org/format/2307.02984">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Privacy-Preserving Walk in the Latent Space of Generative Models for Medical Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Pennisi%2C+M">Matteo Pennisi</a>, <a href="/search/?searchtype=author&query=Salanitri%2C+F+P">Federica Proietto Salanitri</a>, <a href="/search/?searchtype=author&query=Bellitto%2C+G">Giovanni Bellitto</a>, <a href="/search/?searchtype=author&query=Palazzo%2C+S">Simone Palazzo</a>, <a href="/search/?searchtype=author&query=Bagci%2C+U">Ulas Bagci</a>, <a href="/search/?searchtype=author&query=Spampinato%2C+C">Concetto Spampinato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.02984v1-abstract-short" style="display: inline;"> Generative Adversarial Networks (GANs) have demonstrated their ability to generate synthetic samples that match a target distribution. However, from a privacy perspective, using GANs as a proxy for data sharing is not a safe solution, as they tend to embed near-duplicates of real samples in the latent space. Recent works, inspired by k-anonymity principles, address this issue through sample aggreg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.02984v1-abstract-full').style.display = 'inline'; document.getElementById('2307.02984v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.02984v1-abstract-full" style="display: none;"> Generative Adversarial Networks (GANs) have demonstrated their ability to generate synthetic samples that match a target distribution. However, from a privacy perspective, using GANs as a proxy for data sharing is not a safe solution, as they tend to embed near-duplicates of real samples in the latent space. Recent works, inspired by k-anonymity principles, address this issue through sample aggregation in the latent space, with the drawback of reducing the dataset by a factor of k. Our work aims to mitigate this problem by proposing a latent space navigation strategy able to generate diverse synthetic samples that may support effective training of deep models, while addressing privacy concerns in a principled way. Our approach leverages an auxiliary identity classifier as a guide to non-linearly walk between points in the latent space, minimizing the risk of collision with near-duplicates of real samples. We empirically demonstrate that, given any random pair of points in the latent space, our walking strategy is safer than linear interpolation. We then test our path-finding strategy combined to k-same methods and demonstrate, on two benchmarks for tuberculosis and diabetic retinopathy classification, that training a model using samples generated by our approach mitigate drops in performance, while keeping privacy preservation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.02984v1-abstract-full').style.display = 'none'; document.getElementById('2307.02984v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at MICCAI 2023</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Bagci%2C+U&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Bagci%2C+U&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Bagci%2C+U&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Bagci%2C+U&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository