CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 109 results for author: <span class="mathjax">Kainz, B</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Kainz%2C+B">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Kainz, B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Kainz%2C+B&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Kainz, B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Kainz%2C+B&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Kainz%2C+B&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kainz%2C+B&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kainz%2C+B&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04956">arXiv:2411.04956</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.04956">pdf</a>, <a href="https://arxiv.org/format/2411.04956">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Uncovering Hidden Subspaces in Video Diffusion Models Using Re-Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dombrowski%2C+M">Mischa Dombrowski</a>, <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04956v1-abstract-short" style="display: inline;"> Latent Video Diffusion Models can easily deceive casual observers and domain experts alike thanks to the produced image quality and temporal consistency. Beyond entertainment, this creates opportunities around safe data sharing of fully synthetic datasets, which are crucial in healthcare, as well as other domains relying on sensitive personal information. However, privacy concerns with this approa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04956v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04956v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04956v1-abstract-full" style="display: none;"> Latent Video Diffusion Models can easily deceive casual observers and domain experts alike thanks to the produced image quality and temporal consistency. Beyond entertainment, this creates opportunities around safe data sharing of fully synthetic datasets, which are crucial in healthcare, as well as other domains relying on sensitive personal information. However, privacy concerns with this approach have not fully been addressed yet, and models trained on synthetic data for specific downstream tasks still perform worse than those trained on real data. This discrepancy may be partly due to the sampling space being a subspace of the training videos, effectively reducing the training data size for downstream models. Additionally, the reduced temporal consistency when generating long videos could be a contributing factor. In this paper, we first show that training privacy-preserving models in latent space is computationally more efficient and generalize better. Furthermore, to investigate downstream degradation factors, we propose to use a re-identification model, previously employed as a privacy preservation filter. We demonstrate that it is sufficient to train this model on the latent space of the video generator. Subsequently, we use these models to evaluate the subspace covered by synthetic video datasets and thus introduce a new way to measure the faithfulness of generative machine learning models. We focus on a specific application in healthcare echocardiography to illustrate the effectiveness of our novel methods. Our findings indicate that only up to 30.8% of the training videos are learned in latent video diffusion models, which could explain the lack of performance when training downstream tasks on synthetic data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04956v1-abstract-full').style.display = 'none'; document.getElementById('2411.04956v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 tables, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05322">arXiv:2410.05322</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.05322">pdf</a>, <a href="https://arxiv.org/format/2410.05322">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Noise Crystallization and Liquid Noise: Zero-shot Video Generation using Image Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khan%2C+M+H">Muhammad Haaris Khan</a>, <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05322v1-abstract-short" style="display: inline;"> Although powerful for image generation, consistent and controllable video is a longstanding problem for diffusion models. Video models require extensive training and computational resources, leading to high costs and large environmental impacts. Moreover, video models currently offer limited control of the output motion. This paper introduces a novel approach to video generation by augmenting imag&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05322v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05322v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05322v1-abstract-full" style="display: none;"> Although powerful for image generation, consistent and controllable video is a longstanding problem for diffusion models. Video models require extensive training and computational resources, leading to high costs and large environmental impacts. Moreover, video models currently offer limited control of the output motion. This paper introduces a novel approach to video generation by augmenting image diffusion models to create sequential animation frames while maintaining fine detail. These techniques can be applied to existing image models without training any video parameters (zero-shot) by altering the input noise in a latent diffusion model. Two complementary methods are presented. Noise crystallization ensures consistency but is limited to large movements due to reduced latent embedding sizes. Liquid noise trades consistency for greater flexibility without resolution limitations. The core concepts also allow other applications such as relighting, seamless upscaling, and improved video style transfer. Furthermore, an exploration of the VAE embedding used for latent diffusion models is performed, resulting in interesting theoretical insights such as a method for human-interpretable latent spaces. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05322v1-abstract-full').style.display = 'none'; document.getElementById('2410.05322v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01064">arXiv:2410.01064</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.01064">pdf</a>, <a href="https://arxiv.org/format/2410.01064">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Truth or Deceit? A Bayesian Decoding Game Enhances Consistency and Reliability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Weitong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zang%2C+C">Chengqi Zang</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01064v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) often produce outputs that -- though plausible -- can lack consistency and reliability, particularly in ambiguous or complex scenarios. Challenges arise from ensuring that outputs align with both factual correctness and human intent. This is problematic in existing approaches that trade improved consistency for lower accuracy. To mitigate these challenges, we propose a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01064v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01064v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01064v1-abstract-full" style="display: none;"> Large Language Models (LLMs) often produce outputs that -- though plausible -- can lack consistency and reliability, particularly in ambiguous or complex scenarios. Challenges arise from ensuring that outputs align with both factual correctness and human intent. This is problematic in existing approaches that trade improved consistency for lower accuracy. To mitigate these challenges, we propose a novel game-theoretic approach to enhance consistency and reliability during the decoding stage of LLM output generation. Our method models the decoding process as a multistage Bayesian decoding game. This ensures consistency through Correctness Alignment and enhances reliability via Ambiguity Calibration. The model dynamically converges to a consensus on the most reliable outputs and distinguishes {Valid, Specious} outputs without human feedback or additional training. Our game design allows smaller models to outperform much larger models through game mechanisms (e.g., 78.1 LLaMA13B vs 76.6 PaLM540B), as well as integrating various LL strategies and models, demonstrating the potential of game-theoretic tools to improve the truthfulness and reliability of LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01064v1-abstract-full').style.display = 'none'; document.getElementById('2410.01064v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17800">arXiv:2409.17800</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.17800">pdf</a>, <a href="https://arxiv.org/format/2409.17800">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Bias Assessment and Data Drift Detection in Medical Image Analysis: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Prenner%2C+A">Andrea Prenner</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17800v1-abstract-short" style="display: inline;"> Machine Learning (ML) models have gained popularity in medical imaging analysis given their expert level performance in many medical domains. To enhance the trustworthiness, acceptance, and regulatory compliance of medical imaging models and to facilitate their integration into clinical settings, we review and categorise methods for ensuring ML reliability, both during development and throughout t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17800v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17800v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17800v1-abstract-full" style="display: none;"> Machine Learning (ML) models have gained popularity in medical imaging analysis given their expert level performance in many medical domains. To enhance the trustworthiness, acceptance, and regulatory compliance of medical imaging models and to facilitate their integration into clinical settings, we review and categorise methods for ensuring ML reliability, both during development and throughout the model&#39;s lifespan. Specifically, we provide an overview of methods assessing models&#39; inner-workings regarding bias encoding and detection of data drift for disease classification models. Additionally, to evaluate the severity in case of a significant drift, we provide an overview of the methods developed for classifier accuracy estimation in case of no access to ground truth labels. This should enable practitioners to implement methods ensuring reliable ML deployment and consistent prediction performance over time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17800v1-abstract-full').style.display = 'none'; document.getElementById('2409.17800v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14149">arXiv:2409.14149</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.14149">pdf</a>, <a href="https://arxiv.org/format/2409.14149">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> JVID: Joint Video-Image Diffusion for Visual-Quality and Temporal-Consistency in Video Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Dombrowski%2C+M">Mischa Dombrowski</a>, <a href="/search/cs?searchtype=author&amp;query=Cechnicka%2C+S">Sarah Cechnicka</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+Q">Qingjie Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14149v2-abstract-short" style="display: inline;"> We introduce the Joint Video-Image Diffusion model (JVID), a novel approach to generating high-quality and temporally coherent videos. We achieve this by integrating two diffusion models: a Latent Image Diffusion Model (LIDM) trained on images and a Latent Video Diffusion Model (LVDM) trained on video data. Our method combines these models in the reverse diffusion process, where the LIDM enhances&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14149v2-abstract-full').style.display = 'inline'; document.getElementById('2409.14149v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14149v2-abstract-full" style="display: none;"> We introduce the Joint Video-Image Diffusion model (JVID), a novel approach to generating high-quality and temporally coherent videos. We achieve this by integrating two diffusion models: a Latent Image Diffusion Model (LIDM) trained on images and a Latent Video Diffusion Model (LVDM) trained on video data. Our method combines these models in the reverse diffusion process, where the LIDM enhances image quality and the LVDM ensures temporal consistency. This unique combination allows us to effectively handle the complex spatio-temporal dynamics in video generation. Our results demonstrate quantitative and qualitative improvements in producing realistic and coherent videos. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14149v2-abstract-full').style.display = 'none'; document.getElementById('2409.14149v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09796">arXiv:2409.09796</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.09796">pdf</a>, <a href="https://arxiv.org/format/2409.09796">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Universal Topology Refinement for Medical Image Segmentation with Polynomial Feature Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hanchun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Q">Qiang Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Weitong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Ouyang%2C+C">Cheng Ouyang</a>, <a href="/search/cs?searchtype=author&amp;query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09796v1-abstract-short" style="display: inline;"> Although existing medical image segmentation methods provide impressive pixel-wise accuracy, they often neglect topological correctness, making their segmentations unusable for many downstream tasks. One option is to retrain such models whilst including a topology-driven loss component. However, this is computationally expensive and often impractical. A better solution would be to have a versatile&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09796v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09796v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09796v1-abstract-full" style="display: none;"> Although existing medical image segmentation methods provide impressive pixel-wise accuracy, they often neglect topological correctness, making their segmentations unusable for many downstream tasks. One option is to retrain such models whilst including a topology-driven loss component. However, this is computationally expensive and often impractical. A better solution would be to have a versatile plug-and-play topology refinement method that is compatible with any domain-specific segmentation pipeline. Directly training a post-processing model to mitigate topological errors often fails as such models tend to be biased towards the topological errors of a target segmentation network. The diversity of these errors is confined to the information provided by a labelled training set, which is especially problematic for small datasets. Our method solves this problem by training a model-agnostic topology refinement network with synthetic segmentations that cover a wide variety of topological errors. Inspired by the Stone-Weierstrass theorem, we synthesize topology-perturbation masks with randomly sampled coefficients of orthogonal polynomial bases, which ensures a complete and unbiased representation. Practically, we verified the efficiency and effectiveness of our methods as being compatible with multiple families of polynomial bases, and show evidence that our universal plug-and-play topology refinement network outperforms both existing topology-driven learning-based and post-processing methods. We also show that combining our method with learning-based models provides an effortless add-on, which can further improve the performance of existing approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09796v1-abstract-full').style.display = 'none'; document.getElementById('2409.09796v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the 27th International Conference on Medical Image Computing and Computer Assisted Intervention (MICCAI 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03929">arXiv:2409.03929</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.03929">pdf</a>, <a href="https://arxiv.org/format/2409.03929">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Data-Efficient Generation for Dataset Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhe Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Weitong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cechnicka%2C+S">Sarah Cechnicka</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03929v1-abstract-short" style="display: inline;"> While deep learning techniques have proven successful in image-related tasks, the exponentially increased data storage and computation costs become a significant challenge. Dataset distillation addresses these challenges by synthesizing only a few images for each class that encapsulate all essential information. Most current methods focus on matching. The problems lie in the synthetic images not b&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03929v1-abstract-full').style.display = 'inline'; document.getElementById('2409.03929v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03929v1-abstract-full" style="display: none;"> While deep learning techniques have proven successful in image-related tasks, the exponentially increased data storage and computation costs become a significant challenge. Dataset distillation addresses these challenges by synthesizing only a few images for each class that encapsulate all essential information. Most current methods focus on matching. The problems lie in the synthetic images not being human-readable and the dataset performance being insufficient for downstream learning tasks. Moreover, the distillation time can quickly get out of bounds when the number of synthetic images per class increases even slightly. To address this, we train a class conditional latent diffusion model capable of generating realistic synthetic images with labels. The sampling time can be reduced to several tens of images per seconds. We demonstrate that models can be effectively trained using only a small set of synthetic images and evaluated on a large real test set. Our approach achieved rank \(1\) in The First Dataset Distillation Challenge at ECCV 2024 on the CIFAR100 and TinyImageNet datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03929v1-abstract-full').style.display = 'none'; document.getElementById('2409.03929v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13277">arXiv:2407.13277</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.13277">pdf</a>, <a href="https://arxiv.org/format/2407.13277">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> URCDM: Ultra-Resolution Image Synthesis in Histopathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cechnicka%2C+S">Sarah Cechnicka</a>, <a href="/search/cs?searchtype=author&amp;query=Ball%2C+J">James Ball</a>, <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Simmonds%2C+N">Naomi Simmonds</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+A+P+T">Andrew P. T. Smith</a>, <a href="/search/cs?searchtype=author&amp;query=Horsfield%2C+C">Catherine Horsfield</a>, <a href="/search/cs?searchtype=author&amp;query=Roufosse%2C+C">Candice Roufosse</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13277v1-abstract-short" style="display: inline;"> Diagnosing medical conditions from histopathology data requires a thorough analysis across the various resolutions of Whole Slide Images (WSI). However, existing generative methods fail to consistently represent the hierarchical structure of WSIs due to a focus on high-fidelity patches. To tackle this, we propose Ultra-Resolution Cascaded Diffusion Models (URCDMs) which are capable of synthesising&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13277v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13277v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13277v1-abstract-full" style="display: none;"> Diagnosing medical conditions from histopathology data requires a thorough analysis across the various resolutions of Whole Slide Images (WSI). However, existing generative methods fail to consistently represent the hierarchical structure of WSIs due to a focus on high-fidelity patches. To tackle this, we propose Ultra-Resolution Cascaded Diffusion Models (URCDMs) which are capable of synthesising entire histopathology images at high resolutions whilst authentically capturing the details of both the underlying anatomy and pathology at all magnification levels. We evaluate our method on three separate datasets, consisting of brain, breast and kidney tissue, and surpass existing state-of-the-art multi-resolution models. Furthermore, an expert evaluation study was conducted, demonstrating that URCDMs consistently generate outputs across various resolutions that trained evaluators cannot distinguish from real images. All code and additional examples can be found on GitHub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13277v1-abstract-full').style.display = 'none'; document.getElementById('2407.13277v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2312.01152</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06635">arXiv:2407.06635</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.06635">pdf</a>, <a href="https://arxiv.org/format/2407.06635">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Ensembled Cold-Diffusion Restorations for Unsupervised Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Marimont%2C+S+N">Sergio Naval Marimont</a>, <a href="/search/cs?searchtype=author&amp;query=Siomos%2C+V">Vasilis Siomos</a>, <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Tzelepis%2C+C">Christos Tzelepis</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Tarroni%2C+G">Giacomo Tarroni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06635v1-abstract-short" style="display: inline;"> Unsupervised Anomaly Detection (UAD) methods aim to identify anomalies in test samples comparing them with a normative distribution learned from a dataset known to be anomaly-free. Approaches based on generative models offer interpretability by generating anomaly-free versions of test images, but are typically unable to identify subtle anomalies. Alternatively, approaches using feature modelling o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06635v1-abstract-full').style.display = 'inline'; document.getElementById('2407.06635v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06635v1-abstract-full" style="display: none;"> Unsupervised Anomaly Detection (UAD) methods aim to identify anomalies in test samples comparing them with a normative distribution learned from a dataset known to be anomaly-free. Approaches based on generative models offer interpretability by generating anomaly-free versions of test images, but are typically unable to identify subtle anomalies. Alternatively, approaches using feature modelling or self-supervised methods, such as the ones relying on synthetically generated anomalies, do not provide out-of-the-box interpretability. In this work, we present a novel method that combines the strengths of both strategies: a generative cold-diffusion pipeline (i.e., a diffusion-like pipeline which uses corruptions not based on noise) that is trained with the objective of turning synthetically-corrupted images back to their normal, original appearance. To support our pipeline we introduce a novel synthetic anomaly generation procedure, called DAG, and a novel anomaly score which ensembles restorations conditioned with different degrees of abnormality. Our method surpasses the prior state-of-the art for unsupervised anomaly detection in three different Brain MRI datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06635v1-abstract-full').style.display = 'none'; document.getElementById('2407.06635v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 figures. MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14038">arXiv:2406.14038</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.14038">pdf</a>, <a href="https://arxiv.org/format/2406.14038">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Resource-efficient Medical Image Analysis with Self-adapting Forward-Forward Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=M%C3%BCller%2C+J+P">Johanna P. M眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14038v2-abstract-short" style="display: inline;"> We introduce a fast Self-adapting Forward-Forward Network (SaFF-Net) for medical imaging analysis, mitigating power consumption and resource limitations, which currently primarily stem from the prevalent reliance on back-propagation for model training and fine-tuning. Building upon the recently proposed Forward-Forward Algorithm (FFA), we introduce the Convolutional Forward-Forward Algorithm (CFFA&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14038v2-abstract-full').style.display = 'inline'; document.getElementById('2406.14038v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14038v2-abstract-full" style="display: none;"> We introduce a fast Self-adapting Forward-Forward Network (SaFF-Net) for medical imaging analysis, mitigating power consumption and resource limitations, which currently primarily stem from the prevalent reliance on back-propagation for model training and fine-tuning. Building upon the recently proposed Forward-Forward Algorithm (FFA), we introduce the Convolutional Forward-Forward Algorithm (CFFA), a parameter-efficient reformulation that is suitable for advanced image analysis and overcomes the speed and generalisation constraints of the original FFA. To address hyper-parameter sensitivity of FFAs we are also introducing a self-adapting framework SaFF-Net fine-tuning parameters during warmup and training in parallel. Our approach enables more effective model training and eliminates the previously essential requirement for an arbitrarily chosen Goodness function in FFA. We evaluate our approach on several benchmarking datasets in comparison with standard Back-Propagation (BP) neural networks showing that FFA-based networks with notably fewer parameters and function evaluations can compete with standard models, especially, in one-shot scenarios and large batch sizes. The code will be available at the time of the conference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14038v2-abstract-full').style.display = 'none'; document.getElementById('2406.14038v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for MICCAI Workshop MLMI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13652">arXiv:2406.13652</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.13652">pdf</a>, <a href="https://arxiv.org/format/2406.13652">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Stability and Generalizability in SDE Diffusion Models with Measure-Preserving Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Weitong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zang%2C+C">Chengqi Zang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Cechnicka%2C+S">Sarah Cechnicka</a>, <a href="/search/cs?searchtype=author&amp;query=Ouyang%2C+C">Cheng Ouyang</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13652v1-abstract-short" style="display: inline;"> Inverse problems describe the process of estimating the causal factors from a set of measurements or data. Mapping of often incomplete or degraded data to parameters is ill-posed, thus data-driven iterative solutions are required, for example when reconstructing clean images from poor signals. Diffusion models have shown promise as potent generative tools for solving inverse problems due to their&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13652v1-abstract-full').style.display = 'inline'; document.getElementById('2406.13652v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13652v1-abstract-full" style="display: none;"> Inverse problems describe the process of estimating the causal factors from a set of measurements or data. Mapping of often incomplete or degraded data to parameters is ill-posed, thus data-driven iterative solutions are required, for example when reconstructing clean images from poor signals. Diffusion models have shown promise as potent generative tools for solving inverse problems due to their superior reconstruction quality and their compatibility with iterative solvers. However, most existing approaches are limited to linear inverse problems represented as Stochastic Differential Equations (SDEs). This simplification falls short of addressing the challenging nature of real-world problems, leading to amplified cumulative errors and biases. We provide an explanation for this gap through the lens of measure-preserving dynamics of Random Dynamical Systems (RDS) with which we analyse Temporal Distribution Discrepancy and thus introduce a theoretical framework based on RDS for SDE diffusion models. We uncover several strategies that inherently enhance the stability and generalizability of diffusion models for inverse problems and introduce a novel score-based diffusion framework, the \textbf{D}ynamics-aware S\textbf{D}E \textbf{D}iffusion \textbf{G}enerative \textbf{M}odel (D$^3$GM). The \textit{Measure-preserving property} can return the degraded measurement to the original state despite complex degradation with the RDS concept of \textit{stability}. Our extensive experimental results corroborate the effectiveness of D$^3$GM across multiple benchmarks including a prominent application for inverse problems, magnetic resonance imaging. Code and data will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13652v1-abstract-full').style.display = 'none'; document.getElementById('2406.13652v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.13536">arXiv:2406.13536</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.13536">pdf</a>, <a href="https://arxiv.org/format/2406.13536">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Image Distillation for Safe Data Sharing in Histopathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhe Li</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.13536v3-abstract-short" style="display: inline;"> Histopathology can help clinicians make accurate diagnoses, determine disease prognosis, and plan appropriate treatment strategies. As deep learning techniques prove successful in the medical domain, the primary challenges become limited data availability and concerns about data sharing and privacy. Federated learning has addressed this challenge by training models locally and updating parameters&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13536v3-abstract-full').style.display = 'inline'; document.getElementById('2406.13536v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.13536v3-abstract-full" style="display: none;"> Histopathology can help clinicians make accurate diagnoses, determine disease prognosis, and plan appropriate treatment strategies. As deep learning techniques prove successful in the medical domain, the primary challenges become limited data availability and concerns about data sharing and privacy. Federated learning has addressed this challenge by training models locally and updating parameters on a server. However, issues, such as domain shift and bias, persist and impact overall performance. Dataset distillation presents an alternative approach to overcoming these challenges. It involves creating a small synthetic dataset that encapsulates essential information, which can be shared without constraints. At present, this paradigm is not practicable as current distillation approaches only generate non human readable representations and exhibit insufficient performance for downstream learning tasks. We train a latent diffusion model and construct a new distilled synthetic dataset with a small number of human readable synthetic images. Selection of maximally informative synthetic images is done via graph community analysis of the representation space. We compare downstream classification models trained on our synthetic distillation data to models trained on real data and reach performances suitable for practical application. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.13536v3-abstract-full').style.display = 'none'; document.getElementById('2406.13536v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted at MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00808">arXiv:2406.00808</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.00808">pdf</a>, <a href="https://arxiv.org/format/2406.00808">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> EchoNet-Synthetic: Privacy-preserving Video Generation for Safe Medical Data Sharing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+Q">Qingjie Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Dombrowski%2C+M">Mischa Dombrowski</a>, <a href="/search/cs?searchtype=author&amp;query=Ghosh%2C+A">Arijit Ghosh</a>, <a href="/search/cs?searchtype=author&amp;query=Day%2C+T">Thomas Day</a>, <a href="/search/cs?searchtype=author&amp;query=Gomez%2C+A">Alberto Gomez</a>, <a href="/search/cs?searchtype=author&amp;query=Leeson%2C+P">Paul Leeson</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00808v1-abstract-short" style="display: inline;"> To make medical datasets accessible without sharing sensitive patient information, we introduce a novel end-to-end approach for generative de-identification of dynamic medical imaging data. Until now, generative methods have faced constraints in terms of fidelity, spatio-temporal coherence, and the length of generation, failing to capture the complete details of dataset distributions. We present a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00808v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00808v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00808v1-abstract-full" style="display: none;"> To make medical datasets accessible without sharing sensitive patient information, we introduce a novel end-to-end approach for generative de-identification of dynamic medical imaging data. Until now, generative methods have faced constraints in terms of fidelity, spatio-temporal coherence, and the length of generation, failing to capture the complete details of dataset distributions. We present a model designed to produce high-fidelity, long and complete data samples with near-real-time efficiency and explore our approach on a challenging task: generating echocardiogram videos. We develop our generation method based on diffusion models and introduce a protocol for medical video dataset anonymization. As an exemplar, we present EchoNet-Synthetic, a fully synthetic, privacy-compliant echocardiogram dataset with paired ejection fraction labels. As part of our de-identification protocol, we evaluate the quality of the generated dataset and propose to use clinical downstream tasks as a measurement on top of widely used but potentially biased image quality metrics. Experimental outcomes demonstrate that EchoNet-Synthetic achieves comparable dataset fidelity to the actual dataset, effectively supporting the ejection fraction regression task. Code, weights and dataset are available at https://github.com/HReynaud/EchoNet-Synthetic. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00808v1-abstract-full').style.display = 'none'; document.getElementById('2406.00808v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16776">arXiv:2403.16776</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.16776">pdf</a>, <a href="https://arxiv.org/format/2403.16776">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Diff-Def: Diffusion-Generated Deformation Fields for Conditional Atlases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Starck%2C+S">Sophie Starck</a>, <a href="/search/cs?searchtype=author&amp;query=Sideri-Lampretsa%2C+V">Vasiliki Sideri-Lampretsa</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Menten%2C+M">Martin Menten</a>, <a href="/search/cs?searchtype=author&amp;query=Mueller%2C+T">Tamara Mueller</a>, <a href="/search/cs?searchtype=author&amp;query=Rueckert%2C+D">Daniel Rueckert</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16776v1-abstract-short" style="display: inline;"> Anatomical atlases are widely used for population analysis. Conditional atlases target a particular sub-population defined via certain conditions (e.g. demographics or pathologies) and allow for the investigation of fine-grained anatomical differences - such as morphological changes correlated with age. Existing approaches use either registration-based methods that are unable to handle large anato&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16776v1-abstract-full').style.display = 'inline'; document.getElementById('2403.16776v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16776v1-abstract-full" style="display: none;"> Anatomical atlases are widely used for population analysis. Conditional atlases target a particular sub-population defined via certain conditions (e.g. demographics or pathologies) and allow for the investigation of fine-grained anatomical differences - such as morphological changes correlated with age. Existing approaches use either registration-based methods that are unable to handle large anatomical variations or generative models, which can suffer from training instabilities and hallucinations. To overcome these limitations, we use latent diffusion models to generate deformation fields, which transform a general population atlas into one representing a specific sub-population. By generating a deformation field and registering the conditional atlas to a neighbourhood of images, we ensure structural plausibility and avoid hallucinations, which can occur during direct image synthesis. We compare our method to several state-of-the-art atlas generation methods in experiments using 5000 brain as well as whole-body MR images from UK Biobank. Our method generates highly realistic atlases with smooth transformations and high anatomical fidelity, outperforming the baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16776v1-abstract-full').style.display = 'none'; document.getElementById('2403.16776v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.14429">arXiv:2403.14429</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.14429">pdf</a>, <a href="https://arxiv.org/format/2403.14429">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Style-Extracting Diffusion Models for Semi-Supervised Histopathology Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=%C3%96ttl%2C+M">Mathias 脰ttl</a>, <a href="/search/cs?searchtype=author&amp;query=Wilm%2C+F">Frauke Wilm</a>, <a href="/search/cs?searchtype=author&amp;query=Steenpass%2C+J">Jana Steenpass</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+J">Jingna Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=R%C3%BCbner%2C+M">Matthias R眉bner</a>, <a href="/search/cs?searchtype=author&amp;query=Hartmann%2C+A">Arndt Hartmann</a>, <a href="/search/cs?searchtype=author&amp;query=Beckmann%2C+M">Matthias Beckmann</a>, <a href="/search/cs?searchtype=author&amp;query=Fasching%2C+P">Peter Fasching</a>, <a href="/search/cs?searchtype=author&amp;query=Maier%2C+A">Andreas Maier</a>, <a href="/search/cs?searchtype=author&amp;query=Erber%2C+R">Ramona Erber</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Breininger%2C+K">Katharina Breininger</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.14429v1-abstract-short" style="display: inline;"> Deep learning-based image generation has seen significant advancements with diffusion models, notably improving the quality of generated images. Despite these developments, generating images with unseen characteristics beneficial for downstream tasks has received limited attention. To bridge this gap, we propose Style-Extracting Diffusion Models, featuring two conditioning mechanisms. Specifically&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14429v1-abstract-full').style.display = 'inline'; document.getElementById('2403.14429v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.14429v1-abstract-full" style="display: none;"> Deep learning-based image generation has seen significant advancements with diffusion models, notably improving the quality of generated images. Despite these developments, generating images with unseen characteristics beneficial for downstream tasks has received limited attention. To bridge this gap, we propose Style-Extracting Diffusion Models, featuring two conditioning mechanisms. Specifically, we utilize 1) a style conditioning mechanism which allows to inject style information of previously unseen images during image generation and 2) a content conditioning which can be targeted to a downstream task, e.g., layout for segmentation. We introduce a trainable style encoder to extract style information from images, and an aggregation block that merges style information from multiple style inputs. This architecture enables the generation of images with unseen styles in a zero-shot manner, by leveraging styles from unseen images, resulting in more diverse generations. In this work, we use the image layout as target condition and first show the capability of our method on a natural image dataset as a proof-of-concept. We further demonstrate its versatility in histopathology, where we combine prior knowledge about tissue composition and unannotated data to create diverse synthetic images with known layouts. This allows us to generate additional synthetic data to train a segmentation network in a semi-supervised fashion. We verify the added value of the generated images by showing improved segmentation results and lower performance variability between patients when synthetic images are included during segmentation training. Our code will be made publicly available at [LINK]. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14429v1-abstract-full').style.display = 'none'; document.getElementById('2403.14429v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11641">arXiv:2403.11641</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.11641">pdf</a>, <a href="https://arxiv.org/format/2403.11641">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Arc2Face: A Foundation Model for ID-Consistent Human Faces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Papantoniou%2C+F+P">Foivos Paraperas Papantoniou</a>, <a href="/search/cs?searchtype=author&amp;query=Lattas%2C+A">Alexandros Lattas</a>, <a href="/search/cs?searchtype=author&amp;query=Moschoglou%2C+S">Stylianos Moschoglou</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+J">Jiankang Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Zafeiriou%2C+S">Stefanos Zafeiriou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11641v2-abstract-short" style="display: inline;"> This paper presents Arc2Face, an identity-conditioned face foundation model, which, given the ArcFace embedding of a person, can generate diverse photo-realistic images with an unparalleled degree of face similarity than existing models. Despite previous attempts to decode face recognition features into detailed images, we find that common high-resolution datasets (e.g. FFHQ) lack sufficient ident&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11641v2-abstract-full').style.display = 'inline'; document.getElementById('2403.11641v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11641v2-abstract-full" style="display: none;"> This paper presents Arc2Face, an identity-conditioned face foundation model, which, given the ArcFace embedding of a person, can generate diverse photo-realistic images with an unparalleled degree of face similarity than existing models. Despite previous attempts to decode face recognition features into detailed images, we find that common high-resolution datasets (e.g. FFHQ) lack sufficient identities to reconstruct any subject. To that end, we meticulously upsample a significant portion of the WebFace42M database, the largest public dataset for face recognition (FR). Arc2Face builds upon a pretrained Stable Diffusion model, yet adapts it to the task of ID-to-face generation, conditioned solely on ID vectors. Deviating from recent works that combine ID with text embeddings for zero-shot personalization of text-to-image models, we emphasize on the compactness of FR features, which can fully capture the essence of the human face, as opposed to hand-crafted prompts. Crucially, text-augmented models struggle to decouple identity and text, usually necessitating some description of the given face to achieve satisfactory similarity. Arc2Face, however, only needs the discriminative features of ArcFace to guide the generation, offering a robust prior for a plethora of tasks where ID consistency is of paramount importance. As an example, we train a FR model on synthetic images from our model and achieve superior performance to existing synthetic datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11641v2-abstract-full').style.display = 'none'; document.getElementById('2403.11641v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024 (Oral), 29 pages, 20 figures. Project page: https://arc2face.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.01201">arXiv:2401.01201</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.01201">pdf</a>, <a href="https://arxiv.org/format/2401.01201">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Whole-examination AI estimation of fetal biometrics from 20-week ultrasound scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Venturini%2C+L">Lorenzo Venturini</a>, <a href="/search/cs?searchtype=author&amp;query=Budd%2C+S">Samuel Budd</a>, <a href="/search/cs?searchtype=author&amp;query=Farruggia%2C+A">Alfonso Farruggia</a>, <a href="/search/cs?searchtype=author&amp;query=Wright%2C+R">Robert Wright</a>, <a href="/search/cs?searchtype=author&amp;query=Matthew%2C+J">Jacqueline Matthew</a>, <a href="/search/cs?searchtype=author&amp;query=Day%2C+T+G">Thomas G. Day</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Razavi%2C+R">Reza Razavi</a>, <a href="/search/cs?searchtype=author&amp;query=Hajnal%2C+J+V">Jo V. Hajnal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.01201v1-abstract-short" style="display: inline;"> The current approach to fetal anomaly screening is based on biometric measurements derived from individually selected ultrasound images. In this paper, we introduce a paradigm shift that attains human-level performance in biometric measurement by aggregating automatically extracted biometrics from every frame across an entire scan, with no need for operator intervention. We use a convolutional neu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01201v1-abstract-full').style.display = 'inline'; document.getElementById('2401.01201v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.01201v1-abstract-full" style="display: none;"> The current approach to fetal anomaly screening is based on biometric measurements derived from individually selected ultrasound images. In this paper, we introduce a paradigm shift that attains human-level performance in biometric measurement by aggregating automatically extracted biometrics from every frame across an entire scan, with no need for operator intervention. We use a convolutional neural network to classify each frame of an ultrasound video recording. We then measure fetal biometrics in every frame where appropriate anatomy is visible. We use a Bayesian method to estimate the true value of each biometric from a large number of measurements and probabilistically reject outliers. We performed a retrospective experiment on 1457 recordings (comprising 48 million frames) of 20-week ultrasound scans, estimated fetal biometrics in those scans and compared our estimates to the measurements sonographers took during the scan. Our method achieves human-level performance in estimating fetal biometrics and estimates well-calibrated credible intervals in which the true biometric value is expected to lie. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01201v1-abstract-full').style.display = 'none'; document.getElementById('2401.01201v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 16 figures. Submitted to NPJ digital medicine. For associated video file, see http://wp.doc.ic.ac.uk/ifind/wp-content/uploads/sites/79/2023/12/realtime.gif</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.7; J.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.01152">arXiv:2312.01152</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.01152">pdf</a>, <a href="https://arxiv.org/format/2312.01152">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Ultra-Resolution Cascaded Diffusion Model for Gigapixel Image Synthesis in Histopathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cechnicka%2C+S">Sarah Cechnicka</a>, <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Ball%2C+J">James Ball</a>, <a href="/search/cs?searchtype=author&amp;query=Simmonds%2C+N">Naomi Simmonds</a>, <a href="/search/cs?searchtype=author&amp;query=Horsfield%2C+C">Catherine Horsfield</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+A">Andrew Smith</a>, <a href="/search/cs?searchtype=author&amp;query=Roufosse%2C+C">Candice Roufosse</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.01152v1-abstract-short" style="display: inline;"> Diagnoses from histopathology images rely on information from both high and low resolutions of Whole Slide Images. Ultra-Resolution Cascaded Diffusion Models (URCDMs) allow for the synthesis of high-resolution images that are realistic at all magnification levels, focusing not only on fidelity but also on long-distance spatial coherency. Our model beats existing methods, improving the pFID-50k [2]&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01152v1-abstract-full').style.display = 'inline'; document.getElementById('2312.01152v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.01152v1-abstract-full" style="display: none;"> Diagnoses from histopathology images rely on information from both high and low resolutions of Whole Slide Images. Ultra-Resolution Cascaded Diffusion Models (URCDMs) allow for the synthesis of high-resolution images that are realistic at all magnification levels, focusing not only on fidelity but also on long-distance spatial coherency. Our model beats existing methods, improving the pFID-50k [2] score by 110.63 to 39.52 pFID-50k. Additionally, a human expert evaluation study was performed, reaching a weighted Mean Absolute Error (MAE) of 0.11 for the Lower Resolution Diffusion Models and a weighted MAE of 0.22 for the URCDM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01152v1-abstract-full').style.display = 'none'; document.getElementById('2312.01152v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MedNeurIPS 2023 poster</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.18645">arXiv:2311.18645</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.18645">pdf</a>, <a href="https://arxiv.org/format/2311.18645">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Stochastic Vision Transformers with Wasserstein Distance-Aware Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Erick%2C+F+X">Franciskus Xaverius Erick</a>, <a href="/search/cs?searchtype=author&amp;query=Rezaei%2C+M">Mina Rezaei</a>, <a href="/search/cs?searchtype=author&amp;query=M%C3%BCller%2C+J+P">Johanna Paula M眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.18645v1-abstract-short" style="display: inline;"> Self-supervised learning is one of the most promising approaches to acquiring knowledge from limited labeled data. Despite the substantial advancements made in recent years, self-supervised models have posed a challenge to practitioners, as they do not readily provide insight into the model&#39;s confidence and uncertainty. Tackling this issue is no simple feat, primarily due to the complexity involve&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.18645v1-abstract-full').style.display = 'inline'; document.getElementById('2311.18645v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.18645v1-abstract-full" style="display: none;"> Self-supervised learning is one of the most promising approaches to acquiring knowledge from limited labeled data. Despite the substantial advancements made in recent years, self-supervised models have posed a challenge to practitioners, as they do not readily provide insight into the model&#39;s confidence and uncertainty. Tackling this issue is no simple feat, primarily due to the complexity involved in implementing techniques that can make use of the latent representations learned during pre-training without relying on explicit labels. Motivated by this, we introduce a new stochastic vision transformer that integrates uncertainty and distance awareness into self-supervised learning (SSL) pipelines. Instead of the conventional deterministic vector embedding, our novel stochastic vision transformer encodes image patches into elliptical Gaussian distributional embeddings. Notably, the attention matrices of these stochastic representational embeddings are computed using Wasserstein distance-based attention, effectively capitalizing on the distributional nature of these embeddings. Additionally, we propose a regularization term based on Wasserstein distance for both pre-training and fine-tuning processes, thereby incorporating distance awareness into latent representations. We perform extensive experiments across different tasks such as in-distribution generalization, out-of-distribution detection, dataset corruption, semi-supervised settings, and transfer learning to other datasets and tasks. Our proposed method achieves superior accuracy and calibration, surpassing the self-supervised baseline in a wide range of experiments on a variety of datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.18645v1-abstract-full').style.display = 'none'; document.getElementById('2311.18645v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.15453">arXiv:2311.15453</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.15453">pdf</a>, <a href="https://arxiv.org/format/2311.15453">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> DISYRE: Diffusion-Inspired SYnthetic REstoration for Unsupervised Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Marimont%2C+S+N">Sergio Naval Marimont</a>, <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Siomos%2C+V">Vasilis Siomos</a>, <a href="/search/cs?searchtype=author&amp;query=Tzelepis%2C+C">Christos Tzelepis</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Tarroni%2C+G">Giacomo Tarroni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.15453v2-abstract-short" style="display: inline;"> Unsupervised Anomaly Detection (UAD) techniques aim to identify and localize anomalies without relying on annotations, only leveraging a model trained on a dataset known to be free of anomalies. Diffusion models learn to modify inputs $x$ to increase the probability of it belonging to a desired distribution, i.e., they model the score function $\nabla_x \log p(x)$. Such a score function is potenti&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.15453v2-abstract-full').style.display = 'inline'; document.getElementById('2311.15453v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.15453v2-abstract-full" style="display: none;"> Unsupervised Anomaly Detection (UAD) techniques aim to identify and localize anomalies without relying on annotations, only leveraging a model trained on a dataset known to be free of anomalies. Diffusion models learn to modify inputs $x$ to increase the probability of it belonging to a desired distribution, i.e., they model the score function $\nabla_x \log p(x)$. Such a score function is potentially relevant for UAD, since $\nabla_x \log p(x)$ is itself a pixel-wise anomaly score. However, diffusion models are trained to invert a corruption process based on Gaussian noise and the learned score function is unlikely to generalize to medical anomalies. This work addresses the problem of how to learn a score function relevant for UAD and proposes DISYRE: Diffusion-Inspired SYnthetic REstoration. We retain the diffusion-like pipeline but replace the Gaussian noise corruption with a gradual, synthetic anomaly corruption so the learned score function generalizes to medical, naturally occurring anomalies. We evaluate DISYRE on three common Brain MRI UAD benchmarks and substantially outperform other methods in two out of the three tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.15453v2-abstract-full').style.display = 'none'; document.getElementById('2311.15453v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures. Accepted for publication in ISBI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.01567">arXiv:2311.01567</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.01567">pdf</a>, <a href="https://arxiv.org/format/2311.01567">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Hyperparameter Space of Image Diffusion Models for Echocardiogram Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.01567v1-abstract-short" style="display: inline;"> This work presents an extensive hyperparameter search on Image Diffusion Models for Echocardiogram generation. The objective is to establish foundational benchmarks and provide guidelines within the realm of ultrasound image and video generation. This study builds over the latest advancements, including cutting-edge model architectures and training methodologies. We also examine the distribution s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.01567v1-abstract-full').style.display = 'inline'; document.getElementById('2311.01567v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.01567v1-abstract-full" style="display: none;"> This work presents an extensive hyperparameter search on Image Diffusion Models for Echocardiogram generation. The objective is to establish foundational benchmarks and provide guidelines within the realm of ultrasound image and video generation. This study builds over the latest advancements, including cutting-edge model architectures and training methodologies. We also examine the distribution shift between real and generated samples and consider potential solutions, crucial to train efficient models on generated data. We determine an Optimal FID score of $0.88$ for our research problem and achieve an FID of $2.60$. This work is aimed at contributing valuable insights and serving as a reference for further developments in the specialized field of ultrasound image and video generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.01567v1-abstract-full').style.display = 'none'; document.getElementById('2311.01567v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MedNeurIPS 2023 poster</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04187">arXiv:2310.04187</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.04187">pdf</a>, <a href="https://arxiv.org/format/2310.04187">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-031-44992-5_2">10.1007/978-3-031-44992-5_2 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Whole Slide Multiple Instance Learning for Predicting Axillary Lymph Node Metastasis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shk%C3%ABmbi%2C+G">Glejdis Shk毛mbi</a>, <a href="/search/cs?searchtype=author&amp;query=M%C3%BCller%2C+J+P">Johanna P. M眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhe Li</a>, <a href="/search/cs?searchtype=author&amp;query=Breininger%2C+K">Katharina Breininger</a>, <a href="/search/cs?searchtype=author&amp;query=Sch%C3%BCffler%2C+P">Peter Sch眉ffler</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04187v1-abstract-short" style="display: inline;"> Breast cancer is a major concern for women&#39;s health globally, with axillary lymph node (ALN) metastasis identification being critical for prognosis evaluation and treatment guidance. This paper presents a deep learning (DL) classification pipeline for quantifying clinical information from digital core-needle biopsy (CNB) images, with one step less than existing methods. A publicly available datase&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04187v1-abstract-full').style.display = 'inline'; document.getElementById('2310.04187v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04187v1-abstract-full" style="display: none;"> Breast cancer is a major concern for women&#39;s health globally, with axillary lymph node (ALN) metastasis identification being critical for prognosis evaluation and treatment guidance. This paper presents a deep learning (DL) classification pipeline for quantifying clinical information from digital core-needle biopsy (CNB) images, with one step less than existing methods. A publicly available dataset of 1058 patients was used to evaluate the performance of different baseline state-of-the-art (SOTA) DL models in classifying ALN metastatic status based on CNB images. An extensive ablation study of various data augmentation techniques was also conducted. Finally, the manual tumor segmentation and annotation step performed by the pathologists was assessed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04187v1-abstract-full').style.display = 'none'; document.getElementById('2310.04187v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for MICCAI DEMI Workshop 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Data Engineering in Medical Imaging. DEMI 2023. Lecture Notes in Computer Science, vol 14314. Springer, Cham </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.05090">arXiv:2309.05090</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.05090">pdf</a>, <a href="https://arxiv.org/format/2309.05090">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Sculpting Efficiency: Pruning Medical Imaging Models for On-Device Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sreeram%2C+S">Sudarshan Sreeram</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.05090v2-abstract-short" style="display: inline;"> Leveraging ML advancements to augment healthcare systems can improve patient outcomes. Yet, uninformed engineering decisions in early-stage research inadvertently hinder the feasibility of such solutions for high-throughput, on-device inference, particularly in settings involving legacy hardware and multi-modal gigapixel images. Through a preliminary case study concerning segmentation in cardiolog&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05090v2-abstract-full').style.display = 'inline'; document.getElementById('2309.05090v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.05090v2-abstract-full" style="display: none;"> Leveraging ML advancements to augment healthcare systems can improve patient outcomes. Yet, uninformed engineering decisions in early-stage research inadvertently hinder the feasibility of such solutions for high-throughput, on-device inference, particularly in settings involving legacy hardware and multi-modal gigapixel images. Through a preliminary case study concerning segmentation in cardiology, we highlight the excess operational complexity in a suboptimally configured ML model from prior work and demonstrate that it can be sculpted away using pruning to meet deployment criteria. Our results show a compression rate of 1148x with minimal loss in quality (~4%) and, at higher rates, achieve faster inference on a CPU than the GPU baseline, stressing the need to consider task complexity and architectural details when using off-the-shelf models. With this, we consider avenues for future research in streamlining workflows for clinical researchers to develop models quicker and better suited for real-world use. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.05090v2-abstract-full').style.display = 'none'; document.getElementById('2309.05090v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at MedNeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.09026">arXiv:2308.09026</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.09026">pdf</a>, <a href="https://arxiv.org/ps/2308.09026">ps</a>, <a href="https://arxiv.org/format/2308.09026">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LesionMix: A Lesion-Level Data Augmentation Method for Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Basaran%2C+B+D">Berke Doga Basaran</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Weitong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Qiao%2C+M">Mengyun Qiao</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Matthews%2C+P+M">Paul M. Matthews</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+W">Wenjia Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.09026v1-abstract-short" style="display: inline;"> Data augmentation has become a de facto component of deep learning-based medical image segmentation methods. Most data augmentation techniques used in medical imaging focus on spatial and intensity transformations to improve the diversity of training images. They are often designed at the image level, augmenting the full image, and do not pay attention to specific abnormalities within the image. H&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09026v1-abstract-full').style.display = 'inline'; document.getElementById('2308.09026v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.09026v1-abstract-full" style="display: none;"> Data augmentation has become a de facto component of deep learning-based medical image segmentation methods. Most data augmentation techniques used in medical imaging focus on spatial and intensity transformations to improve the diversity of training images. They are often designed at the image level, augmenting the full image, and do not pay attention to specific abnormalities within the image. Here, we present LesionMix, a novel and simple lesion-aware data augmentation method. It performs augmentation at the lesion level, increasing the diversity of lesion shape, location, intensity and load distribution, and allowing both lesion populating and inpainting. Experiments on different modalities and different lesion datasets, including four brain MR lesion datasets and one liver CT lesion dataset, demonstrate that LesionMix achieves promising performance in lesion image segmentation, outperforming several recent Mix-based data augmentation methods. The code will be released at https://github.com/dogabasaran/lesionmix. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09026v1-abstract-full').style.display = 'none'; document.getElementById('2308.09026v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 5 figures, 4 tables, MICCAI DALI Workshop 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.00899">arXiv:2307.00899</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.00899">pdf</a>, <a href="https://arxiv.org/format/2307.00899">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Many tasks make light work: Learning to localise medical anomalies from multiple synthetic tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+J">Jeremy Tan</a>, <a href="/search/cs?searchtype=author&amp;query=M%C3%BCller%2C+J+P">Johanna P. M眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Dombrowski%2C+M">Mischa Dombrowski</a>, <a href="/search/cs?searchtype=author&amp;query=Batten%2C+J">James Batten</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.00899v1-abstract-short" style="display: inline;"> There is a growing interest in single-class modelling and out-of-distribution detection as fully supervised machine learning models cannot reliably identify classes not included in their training. The long tail of infinitely many out-of-distribution classes in real-world scenarios, e.g., for screening, triage, and quality control, means that it is often necessary to train single-class models that&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.00899v1-abstract-full').style.display = 'inline'; document.getElementById('2307.00899v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.00899v1-abstract-full" style="display: none;"> There is a growing interest in single-class modelling and out-of-distribution detection as fully supervised machine learning models cannot reliably identify classes not included in their training. The long tail of infinitely many out-of-distribution classes in real-world scenarios, e.g., for screening, triage, and quality control, means that it is often necessary to train single-class models that represent an expected feature distribution, e.g., from only strictly healthy volunteer data. Conventional supervised machine learning would require the collection of datasets that contain enough samples of all possible diseases in every imaging modality, which is not realistic. Self-supervised learning methods with synthetic anomalies are currently amongst the most promising approaches, alongside generative auto-encoders that analyse the residual reconstruction error. However, all methods suffer from a lack of structured validation, which makes calibration for deployment difficult and dataset-dependant. Our method alleviates this by making use of multiple visually-distinct synthetic anomaly learning tasks for both training and validation. This enables more robust training and generalisation. With our approach we can readily outperform state-of-the-art methods, which we demonstrate on exemplars in brain MRI and chest X-rays. Code is available at https://github.com/matt-baugh/many-tasks-make-light-work . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.00899v1-abstract-full').style.display = 'none'; document.getElementById('2307.00899v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Early accepted to MICCAI 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.09269">arXiv:2306.09269</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.09269">pdf</a>, <a href="https://arxiv.org/format/2306.09269">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Zero-Shot Anomaly Detection with Pre-trained Segmentation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Batten%2C+J">James Batten</a>, <a href="/search/cs?searchtype=author&amp;query=M%C3%BCller%2C+J+P">Johanna P. M眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.09269v1-abstract-short" style="display: inline;"> This technical report outlines our submission to the zero-shot track of the Visual Anomaly and Novelty Detection (VAND) 2023 Challenge. Building on the performance of the WINCLIP framework, we aim to enhance the system&#39;s localization capabilities by integrating zero-shot segmentation models. In addition, we perform foreground instance segmentation which enables the model to focus on the relevant p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.09269v1-abstract-full').style.display = 'inline'; document.getElementById('2306.09269v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.09269v1-abstract-full" style="display: none;"> This technical report outlines our submission to the zero-shot track of the Visual Anomaly and Novelty Detection (VAND) 2023 Challenge. Building on the performance of the WINCLIP framework, we aim to enhance the system&#39;s localization capabilities by integrating zero-shot segmentation models. In addition, we perform foreground instance segmentation which enables the model to focus on the relevant parts of the image, thus allowing the models to better identify small or subtle deviations. Our pipeline requires no external data or information, allowing for it to be directly applied to new datasets. Our team (Variance Vigilance Vanguard) ranked third in the zero-shot track of the VAND challenge, and achieve an average F1-max score of 81.5/24.2 at a sample/pixel level on the VisA dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.09269v1-abstract-full').style.display = 'none'; document.getElementById('2306.09269v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Ranked 3rd in zero-shot track of the Visual Anomaly and Novelty Detection (VAND) 2023 Challenge</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.01363">arXiv:2306.01363</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.01363">pdf</a>, <a href="https://arxiv.org/format/2306.01363">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Quantifying Sample Anonymity in Score-Based Generative Models with Adversarial Fingerprinting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dombrowski%2C+M">Mischa Dombrowski</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.01363v1-abstract-short" style="display: inline;"> Recent advances in score-based generative models have led to a huge spike in the development of downstream applications using generative models ranging from data augmentation over image and video generation to anomaly detection. Despite publicly available trained models, their potential to be used for privacy preserving data sharing has not been fully explored yet. Training diffusion models on pri&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.01363v1-abstract-full').style.display = 'inline'; document.getElementById('2306.01363v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.01363v1-abstract-full" style="display: none;"> Recent advances in score-based generative models have led to a huge spike in the development of downstream applications using generative models ranging from data augmentation over image and video generation to anomaly detection. Despite publicly available trained models, their potential to be used for privacy preserving data sharing has not been fully explored yet. Training diffusion models on private data and disseminating the models and weights rather than the raw dataset paves the way for innovative large-scale data-sharing strategies, particularly in healthcare, where safeguarding patients&#39; personal health information is paramount. However, publishing such models without individual consent of, e.g., the patients from whom the data was acquired, necessitates guarantees that identifiable training samples will never be reproduced, thus protecting personal health data and satisfying the requirements of policymakers and regulatory bodies. This paper introduces a method for estimating the upper bound of the probability of reproducing identifiable training images during the sampling process. This is achieved by designing an adversarial approach that searches for anatomic fingerprints, such as medical devices or dermal art, which could potentially be employed to re-identify training images. Our method harnesses the learned score-based model to estimate the probability of the entire subspace of the score function that may be utilized for one-to-one reproduction of training samples. To validate our estimates, we generate anomalies containing a fingerprint and investigate whether generated samples from trained generative models can be uniquely mapped to the original training samples. Overall our results show that privacy-breaching images are reproduced at sampling time if the models were trained without care. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.01363v1-abstract-full').style.display = 'none'; document.getElementById('2306.01363v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.09534">arXiv:2304.09534</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.09534">pdf</a>, <a href="https://arxiv.org/format/2304.09534">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Realistic Data Enrichment for Robust Image Segmentation in Histopathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cechnicka%2C+S">Sarah Cechnicka</a>, <a href="/search/cs?searchtype=author&amp;query=Ball%2C+J">James Ball</a>, <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Arthurs%2C+C">Callum Arthurs</a>, <a href="/search/cs?searchtype=author&amp;query=Roufosse%2C+C">Candice Roufosse</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.09534v2-abstract-short" style="display: inline;"> Poor performance of quantitative analysis in histopathological Whole Slide Images (WSI) has been a significant obstacle in clinical practice. Annotating large-scale WSIs manually is a demanding and time-consuming task, unlikely to yield the expected results when used for fully supervised learning systems. Rarely observed disease patterns and large differences in object scales are difficult to mode&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.09534v2-abstract-full').style.display = 'inline'; document.getElementById('2304.09534v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.09534v2-abstract-full" style="display: none;"> Poor performance of quantitative analysis in histopathological Whole Slide Images (WSI) has been a significant obstacle in clinical practice. Annotating large-scale WSIs manually is a demanding and time-consuming task, unlikely to yield the expected results when used for fully supervised learning systems. Rarely observed disease patterns and large differences in object scales are difficult to model through conventional patient intake. Prior methods either fall back to direct disease classification, which only requires learning a few factors per image, or report on average image segmentation performance, which is highly biased towards majority observations. Geometric image augmentation is commonly used to improve robustness for average case predictions and to enrich limited datasets. So far no method provided sampling of a realistic posterior distribution to improve stability, e.g. for the segmentation of imbalanced objects within images. Therefore, we propose a new approach, based on diffusion models, which can enrich an imbalanced dataset with plausible examples from underrepresented groups by conditioning on segmentation maps. Our method can simply expand limited clinical datasets making them suitable to train machine learning pipelines, and provides an interpretable and human-controllable way of generating histopathology images that are indistinguishable from real ones to human experts. We validate our findings on two datasets, one from the public domain and one from a Kidney Transplant study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.09534v2-abstract-full').style.display = 'none'; document.getElementById('2304.09534v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 2 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.17908">arXiv:2303.17908</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.17908">pdf</a>, <a href="https://arxiv.org/format/2303.17908">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Trade-offs in Fine-tuned Diffusion Models Between Accuracy and Interpretability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dombrowski%2C+M">Mischa Dombrowski</a>, <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=M%C3%BCller%2C+J+P">Johanna P. M眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.17908v2-abstract-short" style="display: inline;"> Recent advancements in diffusion models have significantly impacted the trajectory of generative machine learning research, with many adopting the strategy of fine-tuning pre-trained models using domain-specific text-to-image datasets. Notably, this method has been readily employed for medical applications, such as X-ray image synthesis, leveraging the plethora of associated radiology reports. Yet&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.17908v2-abstract-full').style.display = 'inline'; document.getElementById('2303.17908v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.17908v2-abstract-full" style="display: none;"> Recent advancements in diffusion models have significantly impacted the trajectory of generative machine learning research, with many adopting the strategy of fine-tuning pre-trained models using domain-specific text-to-image datasets. Notably, this method has been readily employed for medical applications, such as X-ray image synthesis, leveraging the plethora of associated radiology reports. Yet, a prevailing concern is the lack of assurance on whether these models genuinely comprehend their generated content. With the evolution of text-conditional image generation, these models have grown potent enough to facilitate object localization scrutiny. Our research underscores this advancement in the critical realm of medical imaging, emphasizing the crucial role of interpretability. We further unravel a consequential trade-off between image fidelity as gauged by conventional metrics and model interpretability in generative diffusion models. Specifically, the adoption of learnable text encoders when fine-tuning results in diminished interpretability. Our in-depth exploration uncovers the underlying factors responsible for this divergence. Consequently, we present a set of design principles for the development of truly interpretable generative models. Code is available at https://github.com/MischaD/chest-distillation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.17908v2-abstract-full').style.display = 'none'; document.getElementById('2303.17908v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.13227">arXiv:2303.13227</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.13227">pdf</a>, <a href="https://arxiv.org/format/2303.13227">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Confidence-Aware and Self-Supervised Image Anomaly Localisation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=M%C3%BCller%2C+J+P">Johanna P. M眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+J">Jeremy Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Dombrowski%2C+M">Mischa Dombrowski</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.13227v2-abstract-short" style="display: inline;"> Universal anomaly detection still remains a challenging problem in machine learning and medical image analysis. It is possible to learn an expected distribution from a single class of normative samples, e.g., through epistemic uncertainty estimates, auto-encoding models, or from synthetic anomalies in a self-supervised way. The performance of self-supervised anomaly detection approaches is still i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.13227v2-abstract-full').style.display = 'inline'; document.getElementById('2303.13227v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.13227v2-abstract-full" style="display: none;"> Universal anomaly detection still remains a challenging problem in machine learning and medical image analysis. It is possible to learn an expected distribution from a single class of normative samples, e.g., through epistemic uncertainty estimates, auto-encoding models, or from synthetic anomalies in a self-supervised way. The performance of self-supervised anomaly detection approaches is still inferior compared to methods that use examples from known unknown classes to shape the decision boundary. However, outlier exposure methods often do not identify unknown unknowns. Here we discuss an improved self-supervised single-class training strategy that supports the approximation of probabilistic inference with loosen feature locality constraints. We show that up-scaling of gradients with histogram-equalised images is beneficial for recently proposed self-supervision tasks. Our method is integrated into several out-of-distribution (OOD) detection models and we show evidence that our method outperforms the state-of-the-art on various benchmark datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.13227v2-abstract-full').style.display = 'none'; document.getElementById('2303.13227v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for MICCAI UNSURE Workshop 2023 (Spotlight)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.12644">arXiv:2303.12644</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.12644">pdf</a>, <a href="https://arxiv.org/format/2303.12644">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-031-43999-5_14">10.1007/978-3-031-43999-5_14 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Feature-Conditioned Cascaded Video Diffusion Models for Precise Echocardiogram Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Qiao%2C+M">Mengyun Qiao</a>, <a href="/search/cs?searchtype=author&amp;query=Dombrowski%2C+M">Mischa Dombrowski</a>, <a href="/search/cs?searchtype=author&amp;query=Day%2C+T">Thomas Day</a>, <a href="/search/cs?searchtype=author&amp;query=Razavi%2C+R">Reza Razavi</a>, <a href="/search/cs?searchtype=author&amp;query=Gomez%2C+A">Alberto Gomez</a>, <a href="/search/cs?searchtype=author&amp;query=Leeson%2C+P">Paul Leeson</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.12644v3-abstract-short" style="display: inline;"> Image synthesis is expected to provide value for the translation of machine learning methods into clinical practice. Fundamental problems like model robustness, domain transfer, causal modelling, and operator training become approachable through synthetic data. Especially, heavily operator-dependant modalities like Ultrasound imaging require robust frameworks for image and video generation. So far&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.12644v3-abstract-full').style.display = 'inline'; document.getElementById('2303.12644v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.12644v3-abstract-full" style="display: none;"> Image synthesis is expected to provide value for the translation of machine learning methods into clinical practice. Fundamental problems like model robustness, domain transfer, causal modelling, and operator training become approachable through synthetic data. Especially, heavily operator-dependant modalities like Ultrasound imaging require robust frameworks for image and video generation. So far, video generation has only been possible by providing input data that is as rich as the output data, e.g., image sequence plus conditioning in, video out. However, clinical documentation is usually scarce and only single images are reported and stored, thus retrospective patient-specific analysis or the generation of rich training data becomes impossible with current approaches. In this paper, we extend elucidated diffusion models for video modelling to generate plausible video sequences from single images and arbitrary conditioning with clinical parameters. We explore this idea within the context of echocardiograms by looking into the variation of the Left Ventricle Ejection Fraction, the most essential clinical metric gained from these examinations. We use the publicly available EchoNet-Dynamic dataset for all our experiments. Our image to sequence approach achieves an $R^2$ score of 93%, which is 38 points higher than recently proposed sequence to sequence generation methods. Code and models will be available at: https://github.com/HReynaud/EchoDiffusion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.12644v3-abstract-full').style.display = 'none'; document.getElementById('2303.12644v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in MICCAI 2023 proceedings. https://link.springer.com/chapter/10.1007/978-3-031-43999-5_14</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.01790">arXiv:2302.01790</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.01790">pdf</a>, <a href="https://arxiv.org/format/2302.01790">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41592-023-02150-0">10.1038/s41592-023-02150-0 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Understanding metric-related pitfalls in image analysis validation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Reinke%2C+A">Annika Reinke</a>, <a href="/search/cs?searchtype=author&amp;query=Tizabi%2C+M+D">Minu D. Tizabi</a>, <a href="/search/cs?searchtype=author&amp;query=Baumgartner%2C+M">Michael Baumgartner</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenmann%2C+M">Matthias Eisenmann</a>, <a href="/search/cs?searchtype=author&amp;query=Heckmann-N%C3%B6tzel%2C+D">Doreen Heckmann-N枚tzel</a>, <a href="/search/cs?searchtype=author&amp;query=Kavur%2C+A+E">A. Emre Kavur</a>, <a href="/search/cs?searchtype=author&amp;query=R%C3%A4dsch%2C+T">Tim R盲dsch</a>, <a href="/search/cs?searchtype=author&amp;query=Sudre%2C+C+H">Carole H. Sudre</a>, <a href="/search/cs?searchtype=author&amp;query=Acion%2C+L">Laura Acion</a>, <a href="/search/cs?searchtype=author&amp;query=Antonelli%2C+M">Michela Antonelli</a>, <a href="/search/cs?searchtype=author&amp;query=Arbel%2C+T">Tal Arbel</a>, <a href="/search/cs?searchtype=author&amp;query=Bakas%2C+S">Spyridon Bakas</a>, <a href="/search/cs?searchtype=author&amp;query=Benis%2C+A">Arriel Benis</a>, <a href="/search/cs?searchtype=author&amp;query=Blaschko%2C+M">Matthew Blaschko</a>, <a href="/search/cs?searchtype=author&amp;query=Buettner%2C+F">Florian Buettner</a>, <a href="/search/cs?searchtype=author&amp;query=Cardoso%2C+M+J">M. Jorge Cardoso</a>, <a href="/search/cs?searchtype=author&amp;query=Cheplygina%2C+V">Veronika Cheplygina</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jianxu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Christodoulou%2C+E">Evangelia Christodoulou</a>, <a href="/search/cs?searchtype=author&amp;query=Cimini%2C+B+A">Beth A. Cimini</a>, <a href="/search/cs?searchtype=author&amp;query=Collins%2C+G+S">Gary S. Collins</a>, <a href="/search/cs?searchtype=author&amp;query=Farahani%2C+K">Keyvan Farahani</a>, <a href="/search/cs?searchtype=author&amp;query=Ferrer%2C+L">Luciana Ferrer</a>, <a href="/search/cs?searchtype=author&amp;query=Galdran%2C+A">Adrian Galdran</a>, <a href="/search/cs?searchtype=author&amp;query=van+Ginneken%2C+B">Bram van Ginneken</a> , et al. (53 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.01790v4-abstract-short" style="display: inline;"> Validation metrics are key for the reliable tracking of scientific progress and for bridging the current chasm between artificial intelligence (AI) research and its translation into practice. However, increasing evidence shows that particularly in image analysis, metrics are often chosen inadequately in relation to the underlying research problem. This could be attributed to a lack of accessibilit&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.01790v4-abstract-full').style.display = 'inline'; document.getElementById('2302.01790v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.01790v4-abstract-full" style="display: none;"> Validation metrics are key for the reliable tracking of scientific progress and for bridging the current chasm between artificial intelligence (AI) research and its translation into practice. However, increasing evidence shows that particularly in image analysis, metrics are often chosen inadequately in relation to the underlying research problem. This could be attributed to a lack of accessibility of metric-related knowledge: While taking into account the individual strengths, weaknesses, and limitations of validation metrics is a critical prerequisite to making educated choices, the relevant knowledge is currently scattered and poorly accessible to individual researchers. Based on a multi-stage Delphi process conducted by a multidisciplinary expert consortium as well as extensive community feedback, the present work provides the first reliable and comprehensive common point of access to information on pitfalls related to validation metrics in image analysis. Focusing on biomedical image analysis but with the potential of transfer to other fields, the addressed pitfalls generalize across application domains and are categorized according to a newly created, domain-agnostic taxonomy. To facilitate comprehension, illustrations and specific examples accompany each pitfall. As a structured body of information accessible to researchers of all levels of expertise, this work enhances global comprehension of a key topic in image analysis validation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.01790v4-abstract-full').style.display = 'none'; document.getElementById('2302.01790v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Shared first authors: Annika Reinke and Minu D. Tizabi; shared senior authors: Lena Maier-Hein and Paul F. J盲ger. Published in Nature Methods. arXiv admin note: text overlap with arXiv:2206.01653</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Nature methods, 1-13 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.14306">arXiv:2212.14306</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.14306">pdf</a>, <a href="https://arxiv.org/format/2212.14306">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Foreground-Background Separation through Concept Distillation from Generative Image Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dombrowski%2C+M">Mischa Dombrowski</a>, <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.14306v2-abstract-short" style="display: inline;"> Curating datasets for object segmentation is a difficult task. With the advent of large-scale pre-trained generative models, conditional image generation has been given a significant boost in result quality and ease of use. In this paper, we present a novel method that enables the generation of general foreground-background segmentation models from simple textual descriptions, without requiring se&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.14306v2-abstract-full').style.display = 'inline'; document.getElementById('2212.14306v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.14306v2-abstract-full" style="display: none;"> Curating datasets for object segmentation is a difficult task. With the advent of large-scale pre-trained generative models, conditional image generation has been given a significant boost in result quality and ease of use. In this paper, we present a novel method that enables the generation of general foreground-background segmentation models from simple textual descriptions, without requiring segmentation labels. We leverage and explore pre-trained latent diffusion models, to automatically generate weak segmentation masks for concepts and objects. The masks are then used to fine-tune the diffusion model on an inpainting task, which enables fine-grained removal of the object, while at the same time providing a synthetic foreground and background dataset. We demonstrate that using this method beats previous methods in both discriminative and generative performance and closes the gap with fully supervised training while requiring no pixel-wise object labels. We show results on the task of segmenting four different objects (humans, dogs, cars, birds) and a use case scenario in medical image analysis. The code is available at https://github.com/MischaD/fobadiffusion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.14306v2-abstract-full').style.display = 'none'; document.getElementById('2212.14306v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICCV2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.04582">arXiv:2210.04582</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.04582">pdf</a>, <a href="https://arxiv.org/format/2210.04582">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1111/cgf.14834">10.1111/cgf.14834 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> ParaDime: A Framework for Parametric Dimensionality Reduction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hinterreiter%2C+A">Andreas Hinterreiter</a>, <a href="/search/cs?searchtype=author&amp;query=Humer%2C+C">Christina Humer</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Streit%2C+M">Marc Streit</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.04582v3-abstract-short" style="display: inline;"> ParaDime is a framework for parametric dimensionality reduction (DR). In parametric DR, neural networks are trained to embed high-dimensional data items in a low-dimensional space while minimizing an objective function. ParaDime builds on the idea that the objective functions of several modern DR techniques result from transformed inter-item relationships. It provides a common interface for specif&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.04582v3-abstract-full').style.display = 'inline'; document.getElementById('2210.04582v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.04582v3-abstract-full" style="display: none;"> ParaDime is a framework for parametric dimensionality reduction (DR). In parametric DR, neural networks are trained to embed high-dimensional data items in a low-dimensional space while minimizing an objective function. ParaDime builds on the idea that the objective functions of several modern DR techniques result from transformed inter-item relationships. It provides a common interface for specifying these relations and transformations and for defining how they are used within the losses that govern the training process. Through this interface, ParaDime unifies parametric versions of DR techniques such as metric MDS, t-SNE, and UMAP. It allows users to fully customize all aspects of the DR process. We show how this ease of customization makes ParaDime suitable for experimenting with interesting techniques such as hybrid classification/embedding models and supervised DR. This way, ParaDime opens up new possibilities for visualizing high-dimensional data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.04582v3-abstract-full').style.display = 'none'; document.getElementById('2210.04582v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted at EuroVis 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.04514">arXiv:2210.04514</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.04514">pdf</a>, <a href="https://arxiv.org/format/2210.04514">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Self-Supervised 3D Human Pose Estimation in Static Video Via Neural Rendering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Schmidtke%2C+L">Luca Schmidtke</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+B">Benjamin Hou</a>, <a href="/search/cs?searchtype=author&amp;query=Vlontzos%2C+A">Athanasios Vlontzos</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.04514v1-abstract-short" style="display: inline;"> Inferring 3D human pose from 2D images is a challenging and long-standing problem in the field of computer vision with many applications including motion capture, virtual reality, surveillance or gait analysis for sports and medicine. We present preliminary results for a method to estimate 3D pose from 2D video containing a single person and a static background without the need for any manual land&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.04514v1-abstract-full').style.display = 'inline'; document.getElementById('2210.04514v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.04514v1-abstract-full" style="display: none;"> Inferring 3D human pose from 2D images is a challenging and long-standing problem in the field of computer vision with many applications including motion capture, virtual reality, surveillance or gait analysis for sports and medicine. We present preliminary results for a method to estimate 3D pose from 2D video containing a single person and a static background without the need for any manual landmark annotations. We achieve this by formulating a simple yet effective self-supervision task: our model is required to reconstruct a random frame of a video given a frame from another timepoint and a rendered image of a transformed human shape template. Crucially for optimisation, our ray casting based rendering pipeline is fully differentiable, enabling end to end training solely based on the reconstruction task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.04514v1-abstract-full').style.display = 'none'; document.getElementById('2210.04514v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CV4Metaverse Workshop @ ECCV 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.12305">arXiv:2209.12305</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.12305">pdf</a>, <a href="https://arxiv.org/format/2209.12305">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adnexal Mass Segmentation with Ultrasound Data Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lebbos%2C+C">Clara Lebbos</a>, <a href="/search/cs?searchtype=author&amp;query=Barcroft%2C+J">Jen Barcroft</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+J">Jeremy Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Muller%2C+J+P">Johanna P. Muller</a>, <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Vlontzos%2C+A">Athanasios Vlontzos</a>, <a href="/search/cs?searchtype=author&amp;query=Saso%2C+S">Srdjan Saso</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.12305v1-abstract-short" style="display: inline;"> Ovarian cancer is the most lethal gynaecological malignancy. The disease is most commonly asymptomatic at its early stages and its diagnosis relies on expert evaluation of transvaginal ultrasound images. Ultrasound is the first-line imaging modality for characterising adnexal masses, it requires significant expertise and its analysis is subjective and labour-intensive, therefore open to error. Hen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12305v1-abstract-full').style.display = 'inline'; document.getElementById('2209.12305v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.12305v1-abstract-full" style="display: none;"> Ovarian cancer is the most lethal gynaecological malignancy. The disease is most commonly asymptomatic at its early stages and its diagnosis relies on expert evaluation of transvaginal ultrasound images. Ultrasound is the first-line imaging modality for characterising adnexal masses, it requires significant expertise and its analysis is subjective and labour-intensive, therefore open to error. Hence, automating processes to facilitate and standardise the evaluation of scans is desired in clinical practice. Using supervised learning, we have demonstrated that segmentation of adnexal masses is possible, however, prevalence and label imbalance restricts the performance on under-represented classes. To mitigate this we apply a novel pathology-specific data synthesiser. We create synthetic medical images with their corresponding ground truth segmentations by using Poisson image editing to integrate less common masses into other samples. Our approach achieves the best performance across all classes, including an improvement of up to 8% when compared with nnU-Net baseline approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12305v1-abstract-full').style.display = 'none'; document.getElementById('2209.12305v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ASMUS 2022, LNCS 13565, p. 106, 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.01124">arXiv:2209.01124</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.01124">pdf</a>, <a href="https://arxiv.org/format/2209.01124">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> nnOOD: A Framework for Benchmarking Self-supervised Anomaly Localisation Methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Baugh%2C+M">Matthew Baugh</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+J">Jeremy Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Vlontzos%2C+A">Athanasios Vlontzos</a>, <a href="/search/cs?searchtype=author&amp;query=M%C3%BCller%2C+J+P">Johanna P. M眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.01124v1-abstract-short" style="display: inline;"> The wide variety of in-distribution and out-of-distribution data in medical imaging makes universal anomaly detection a challenging task. Recently a number of self-supervised methods have been developed that train end-to-end models on healthy data augmented with synthetic anomalies. However, it is difficult to compare these methods as it is not clear whether gains in performance are from the task&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.01124v1-abstract-full').style.display = 'inline'; document.getElementById('2209.01124v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.01124v1-abstract-full" style="display: none;"> The wide variety of in-distribution and out-of-distribution data in medical imaging makes universal anomaly detection a challenging task. Recently a number of self-supervised methods have been developed that train end-to-end models on healthy data augmented with synthetic anomalies. However, it is difficult to compare these methods as it is not clear whether gains in performance are from the task itself or the training pipeline around it. It is also difficult to assess whether a task generalises well for universal anomaly detection, as they are often only tested on a limited range of anomalies. To assist with this we have developed nnOOD, a framework that adapts nnU-Net to allow for comparison of self-supervised anomaly localisation methods. By isolating the synthetic, self-supervised task from the rest of the training process we perform a more faithful comparison of the tasks, whilst also making the workflow for evaluating over a given dataset quick and easy. Using this we have implemented the current state-of-the-art tasks and evaluated them on a challenging X-ray dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.01124v1-abstract-full').style.display = 'none'; document.getElementById('2209.01124v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as Spotlight to UNSURE 2022, a workshop at MICCAI 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.02870">arXiv:2208.02870</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2208.02870">pdf</a>, <a href="https://arxiv.org/format/2208.02870">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Improved post-hoc probability calibration for out-of-domain MRI segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ouyang%2C+C">Cheng Ouyang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zeju Li</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+W">Wenjia Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Rueckert%2C+D">Daniel Rueckert</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.02870v2-abstract-short" style="display: inline;"> Probability calibration for deep models is highly desirable in safety-critical applications such as medical imaging. It makes output probabilities of deep networks interpretable, by aligning prediction probability with the actual accuracy in test data. In image segmentation, well-calibrated probabilities allow radiologists to identify regions where model-predicted segmentations are unreliable. The&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02870v2-abstract-full').style.display = 'inline'; document.getElementById('2208.02870v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.02870v2-abstract-full" style="display: none;"> Probability calibration for deep models is highly desirable in safety-critical applications such as medical imaging. It makes output probabilities of deep networks interpretable, by aligning prediction probability with the actual accuracy in test data. In image segmentation, well-calibrated probabilities allow radiologists to identify regions where model-predicted segmentations are unreliable. These unreliable predictions often occur to out-of-domain (OOD) images that are caused by imaging artifacts or unseen imaging protocols. Unfortunately, most previous calibration methods for image segmentation perform sub-optimally on OOD images. To reduce the calibration error when confronted with OOD images, we propose a novel post-hoc calibration model. Our model leverages the pixel susceptibility against perturbations at the local level, and the shape prior information at the global level. The model is tested on cardiac MRI segmentation datasets that contain unseen imaging artifacts and images from an unseen imaging protocol. We demonstrate reduced calibration errors compared with the state-of-the-art calibration algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02870v2-abstract-full').style.display = 'none'; document.getElementById('2208.02870v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for UNSURE workshop at MICCAI 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.14746">arXiv:2206.14746</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2206.14746">pdf</a>, <a href="https://arxiv.org/format/2206.14746">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Placenta Segmentation in Ultrasound Imaging: Addressing Sources of Uncertainty and Limited Field-of-View </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zimmer%2C+V+A">Veronika A. Zimmer</a>, <a href="/search/cs?searchtype=author&amp;query=Gomez%2C+A">Alberto Gomez</a>, <a href="/search/cs?searchtype=author&amp;query=Skelton%2C+E">Emily Skelton</a>, <a href="/search/cs?searchtype=author&amp;query=Wright%2C+R">Robert Wright</a>, <a href="/search/cs?searchtype=author&amp;query=Wheeler%2C+G">Gavin Wheeler</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+S">Shujie Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Ghavami%2C+N">Nooshin Ghavami</a>, <a href="/search/cs?searchtype=author&amp;query=Lloyd%2C+K">Karen Lloyd</a>, <a href="/search/cs?searchtype=author&amp;query=Matthew%2C+J">Jacqueline Matthew</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/cs?searchtype=author&amp;query=Hajnal%2C+J+V">Joseph V. Hajnal</a>, <a href="/search/cs?searchtype=author&amp;query=Schnabel%2C+J+A">Julia A. Schnabel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.14746v1-abstract-short" style="display: inline;"> Automatic segmentation of the placenta in fetal ultrasound (US) is challenging due to the (i) high diversity of placenta appearance, (ii) the restricted quality in US resulting in highly variable reference annotations, and (iii) the limited field-of-view of US prohibiting whole placenta assessment at late gestation. In this work, we address these three challenges with a multi-task learning approac&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.14746v1-abstract-full').style.display = 'inline'; document.getElementById('2206.14746v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.14746v1-abstract-full" style="display: none;"> Automatic segmentation of the placenta in fetal ultrasound (US) is challenging due to the (i) high diversity of placenta appearance, (ii) the restricted quality in US resulting in highly variable reference annotations, and (iii) the limited field-of-view of US prohibiting whole placenta assessment at late gestation. In this work, we address these three challenges with a multi-task learning approach that combines the classification of placental location (e.g., anterior, posterior) and semantic placenta segmentation in a single convolutional neural network. Through the classification task the model can learn from larger and more diverse datasets while improving the accuracy of the segmentation task in particular in limited training set conditions. With this approach we investigate the variability in annotations from multiple raters and show that our automatic segmentations (Dice of 0.86 for anterior and 0.83 for posterior placentas) achieve human-level performance as compared to intra- and inter-observer variability. Lastly, our approach can deliver whole placenta segmentation using a multi-view US acquisition pipeline consisting of three stages: multi-probe image acquisition, image fusion and image segmentation. This results in high quality segmentation of larger structures such as the placenta in US with reduced image artifacts which are beyond the field-of-view of single probes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.14746v1-abstract-full').style.display = 'none'; document.getElementById('2206.14746v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages (18 + appendix), 13 figures (9 + appendix)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.05498">arXiv:2206.05498</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2206.05498">pdf</a>, <a href="https://arxiv.org/format/2206.05498">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="General Literature">cs.GL</span> </div> </div> <p class="title is-5 mathjax"> A Review of Causality for Learning Algorithms in Medical Image Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vlontzos%2C+A">Athanasios Vlontzos</a>, <a href="/search/cs?searchtype=author&amp;query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.05498v2-abstract-short" style="display: inline;"> Medical image analysis is a vibrant research area that offers doctors and medical practitioners invaluable insight and the ability to accurately diagnose and monitor disease. Machine learning provides an additional boost for this area. However, machine learning for medical image analysis is particularly vulnerable to natural biases like domain shifts that affect algorithmic performance and robustn&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.05498v2-abstract-full').style.display = 'inline'; document.getElementById('2206.05498v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.05498v2-abstract-full" style="display: none;"> Medical image analysis is a vibrant research area that offers doctors and medical practitioners invaluable insight and the ability to accurately diagnose and monitor disease. Machine learning provides an additional boost for this area. However, machine learning for medical image analysis is particularly vulnerable to natural biases like domain shifts that affect algorithmic performance and robustness. In this paper we analyze machine learning for medical image analysis within the framework of Technology Readiness Levels and review how causal analysis methods can fill a gap when creating robust and adaptable medical image analysis algorithms. We review methods using causality in medical imaging AI/ML and find that causal analysis has the potential to mitigate critical problems for clinical translation but that uptake and clinical downstream research has been limited so far. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.05498v2-abstract-full').style.display = 'none'; document.getElementById('2206.05498v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at the Journal of Machine Learning for Biomedical Imaging (MELBA) https://www.melba-journal.org/papers/2022:028.html&#34;. ; Paper ID: 2022:028</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Machine.Learning.for.Biomedical.Imaging. 1 (2022) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.02409">arXiv:2206.02409</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2206.02409">pdf</a>, <a href="https://arxiv.org/format/2206.02409">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Is More Data All You Need? A Causal Exploration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vlontzos%2C+A">Athanasios Vlontzos</a>, <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.02409v1-abstract-short" style="display: inline;"> Curating a large scale medical imaging dataset for machine learning applications is both time consuming and expensive. Balancing the workload between model development, data collection and annotations is difficult for machine learning practitioners, especially under time constraints. Causal analysis is often used in medicine and economics to gain insights about the effects of actions and policies.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.02409v1-abstract-full').style.display = 'inline'; document.getElementById('2206.02409v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.02409v1-abstract-full" style="display: none;"> Curating a large scale medical imaging dataset for machine learning applications is both time consuming and expensive. Balancing the workload between model development, data collection and annotations is difficult for machine learning practitioners, especially under time constraints. Causal analysis is often used in medicine and economics to gain insights about the effects of actions and policies. In this paper we explore the effect of dataset interventions on the output of image classification models. Through a causal approach we investigate the effects of the quantity and type of data we need to incorporate in a dataset to achieve better performance for specific subtasks. The main goal of this paper is to highlight the potential of causal analysis as a tool for resource optimization for developing medical imaging ML applications. We explore this concept with a synthetic dataset and an exemplary use-case for Diabetic Retinopathy image analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.02409v1-abstract-full').style.display = 'none'; document.getElementById('2206.02409v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.01653">arXiv:2206.01653</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2206.01653">pdf</a>, <a href="https://arxiv.org/format/2206.01653">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41592-023-02151-z">10.1038/s41592-023-02151-z <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Metrics reloaded: Recommendations for image analysis validation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Maier-Hein%2C+L">Lena Maier-Hein</a>, <a href="/search/cs?searchtype=author&amp;query=Reinke%2C+A">Annika Reinke</a>, <a href="/search/cs?searchtype=author&amp;query=Godau%2C+P">Patrick Godau</a>, <a href="/search/cs?searchtype=author&amp;query=Tizabi%2C+M+D">Minu D. Tizabi</a>, <a href="/search/cs?searchtype=author&amp;query=Buettner%2C+F">Florian Buettner</a>, <a href="/search/cs?searchtype=author&amp;query=Christodoulou%2C+E">Evangelia Christodoulou</a>, <a href="/search/cs?searchtype=author&amp;query=Glocker%2C+B">Ben Glocker</a>, <a href="/search/cs?searchtype=author&amp;query=Isensee%2C+F">Fabian Isensee</a>, <a href="/search/cs?searchtype=author&amp;query=Kleesiek%2C+J">Jens Kleesiek</a>, <a href="/search/cs?searchtype=author&amp;query=Kozubek%2C+M">Michal Kozubek</a>, <a href="/search/cs?searchtype=author&amp;query=Reyes%2C+M">Mauricio Reyes</a>, <a href="/search/cs?searchtype=author&amp;query=Riegler%2C+M+A">Michael A. Riegler</a>, <a href="/search/cs?searchtype=author&amp;query=Wiesenfarth%2C+M">Manuel Wiesenfarth</a>, <a href="/search/cs?searchtype=author&amp;query=Kavur%2C+A+E">A. Emre Kavur</a>, <a href="/search/cs?searchtype=author&amp;query=Sudre%2C+C+H">Carole H. Sudre</a>, <a href="/search/cs?searchtype=author&amp;query=Baumgartner%2C+M">Michael Baumgartner</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenmann%2C+M">Matthias Eisenmann</a>, <a href="/search/cs?searchtype=author&amp;query=Heckmann-N%C3%B6tzel%2C+D">Doreen Heckmann-N枚tzel</a>, <a href="/search/cs?searchtype=author&amp;query=R%C3%A4dsch%2C+T">Tim R盲dsch</a>, <a href="/search/cs?searchtype=author&amp;query=Acion%2C+L">Laura Acion</a>, <a href="/search/cs?searchtype=author&amp;query=Antonelli%2C+M">Michela Antonelli</a>, <a href="/search/cs?searchtype=author&amp;query=Arbel%2C+T">Tal Arbel</a>, <a href="/search/cs?searchtype=author&amp;query=Bakas%2C+S">Spyridon Bakas</a>, <a href="/search/cs?searchtype=author&amp;query=Benis%2C+A">Arriel Benis</a>, <a href="/search/cs?searchtype=author&amp;query=Blaschko%2C+M">Matthew Blaschko</a> , et al. (49 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.01653v8-abstract-short" style="display: inline;"> Increasing evidence shows that flaws in machine learning (ML) algorithm validation are an underestimated global problem. Particularly in automatic biomedical image analysis, chosen performance metrics often do not reflect the domain interest, thus failing to adequately measure scientific progress and hindering translation of ML techniques into practice. To overcome this, our large international ex&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.01653v8-abstract-full').style.display = 'inline'; document.getElementById('2206.01653v8-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.01653v8-abstract-full" style="display: none;"> Increasing evidence shows that flaws in machine learning (ML) algorithm validation are an underestimated global problem. Particularly in automatic biomedical image analysis, chosen performance metrics often do not reflect the domain interest, thus failing to adequately measure scientific progress and hindering translation of ML techniques into practice. To overcome this, our large international expert consortium created Metrics Reloaded, a comprehensive framework guiding researchers in the problem-aware selection of metrics. Following the convergence of ML methodology across application domains, Metrics Reloaded fosters the convergence of validation methodology. The framework was developed in a multi-stage Delphi process and is based on the novel concept of a problem fingerprint - a structured representation of the given problem that captures all aspects that are relevant for metric selection, from the domain interest to the properties of the target structure(s), data set and algorithm output. Based on the problem fingerprint, users are guided through the process of choosing and applying appropriate validation metrics while being made aware of potential pitfalls. Metrics Reloaded targets image analysis problems that can be interpreted as a classification task at image, object or pixel level, namely image-level classification, object detection, semantic segmentation, and instance segmentation tasks. To improve the user experience, we implemented the framework in the Metrics Reloaded online tool, which also provides a point of access to explore weaknesses, strengths and specific recommendations for the most common validation metrics. The broad applicability of our framework across domains is demonstrated by an instantiation for various biological and medical image analysis use cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.01653v8-abstract-full').style.display = 'none'; document.getElementById('2206.01653v8-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Shared first authors: Lena Maier-Hein, Annika Reinke. arXiv admin note: substantial text overlap with arXiv:2104.05642 Published in Nature Methods</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Nature methods, 1-18 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.01651">arXiv:2206.01651</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2206.01651">pdf</a>, <a href="https://arxiv.org/format/2206.01651">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> D&#39;ARTAGNAN: Counterfactual Video Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Vlontzos%2C+A">Athanasios Vlontzos</a>, <a href="/search/cs?searchtype=author&amp;query=Dombrowski%2C+M">Mischa Dombrowski</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+C">Ciar谩n Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Beqiri%2C+A">Arian Beqiri</a>, <a href="/search/cs?searchtype=author&amp;query=Leeson%2C+P">Paul Leeson</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.01651v2-abstract-short" style="display: inline;"> Causally-enabled machine learning frameworks could help clinicians to identify the best course of treatments by answering counterfactual questions. We explore this path for the case of echocardiograms by looking into the variation of the Left Ventricle Ejection Fraction, the most essential clinical metric gained from these examinations. We combine deep neural networks, twin causal networks and gen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.01651v2-abstract-full').style.display = 'inline'; document.getElementById('2206.01651v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.01651v2-abstract-full" style="display: none;"> Causally-enabled machine learning frameworks could help clinicians to identify the best course of treatments by answering counterfactual questions. We explore this path for the case of echocardiograms by looking into the variation of the Left Ventricle Ejection Fraction, the most essential clinical metric gained from these examinations. We combine deep neural networks, twin causal networks and generative adversarial methods for the first time to build D&#39;ARTAGNAN (Deep ARtificial Twin-Architecture GeNerAtive Networks), a novel causal generative model. We demonstrate the soundness of our approach on a synthetic dataset before applying it to cardiac ultrasound videos to answer the question: &#34;What would this echocardiogram look like if the patient had a different ejection fraction?&#34;. To do so, we generate new ultrasound videos, retaining the video style and anatomy of the original patient, while modifying the Ejection Fraction conditioned on a given input. We achieve an SSIM score of 0.79 and an R2 score of 0.51 on the counterfactual videos. Code and models are available at: https://github.com/HReynaud/dartagnan. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.01651v2-abstract-full').style.display = 'none'; document.getElementById('2206.01651v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for MICCAI 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.08239">arXiv:2205.08239</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.08239">pdf</a>, <a href="https://arxiv.org/format/2205.08239">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CAS-Net: Conditional Atlas Generation and Brain Segmentation for Fetal MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Q">Qiang Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Sinclair%2C+M">Matthew Sinclair</a>, <a href="/search/cs?searchtype=author&amp;query=Makropoulos%2C+A">Antonios Makropoulos</a>, <a href="/search/cs?searchtype=author&amp;query=Hajnal%2C+J">Joseph Hajnal</a>, <a href="/search/cs?searchtype=author&amp;query=Edwards%2C+A+D">A. David Edwards</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/cs?searchtype=author&amp;query=Alansary%2C+A">Amir Alansary</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.08239v1-abstract-short" style="display: inline;"> Fetal Magnetic Resonance Imaging (MRI) is used in prenatal diagnosis and to assess early brain development. Accurate segmentation of the different brain tissues is a vital step in several brain analysis tasks, such as cortical surface reconstruction and tissue thickness measurements. Fetal MRI scans, however, are prone to motion artifacts that can affect the correctness of both manual and automati&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.08239v1-abstract-full').style.display = 'inline'; document.getElementById('2205.08239v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.08239v1-abstract-full" style="display: none;"> Fetal Magnetic Resonance Imaging (MRI) is used in prenatal diagnosis and to assess early brain development. Accurate segmentation of the different brain tissues is a vital step in several brain analysis tasks, such as cortical surface reconstruction and tissue thickness measurements. Fetal MRI scans, however, are prone to motion artifacts that can affect the correctness of both manual and automatic segmentation techniques. In this paper, we propose a novel network structure that can simultaneously generate conditional atlases and predict brain tissue segmentation, called CAS-Net. The conditional atlases provide anatomical priors that can constrain the segmentation connectivity, despite the heterogeneity of intensity values caused by motion or partial volume effects. The proposed method is trained and evaluated on 253 subjects from the developing Human Connectome Project (dHCP). The results demonstrate that the proposed method can generate conditional age-specific atlas with sharp boundary and shape variance. It also segment multi-category brain tissues for fetal MRI with a high overall Dice similarity coefficient (DSC) of $85.2\%$ for the selected 9 tissue labels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.08239v1-abstract-full').style.display = 'none'; document.getElementById('2205.08239v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.08329">arXiv:2202.08329</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2202.08329">pdf</a>, <a href="https://arxiv.org/format/2202.08329">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CortexODE: Learning Cortical Surface Reconstruction by Neural ODEs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Q">Qiang Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Robinson%2C+E+C">Emma C. Robinson</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/cs?searchtype=author&amp;query=Alansary%2C+A">Amir Alansary</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.08329v2-abstract-short" style="display: inline;"> We present CortexODE, a deep learning framework for cortical surface reconstruction. CortexODE leverages neural ordinary differential equations (ODEs) to deform an input surface into a target shape by learning a diffeomorphic flow. The trajectories of the points on the surface are modeled as ODEs, where the derivatives of their coordinates are parameterized via a learnable Lipschitz-continuous def&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.08329v2-abstract-full').style.display = 'inline'; document.getElementById('2202.08329v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.08329v2-abstract-full" style="display: none;"> We present CortexODE, a deep learning framework for cortical surface reconstruction. CortexODE leverages neural ordinary differential equations (ODEs) to deform an input surface into a target shape by learning a diffeomorphic flow. The trajectories of the points on the surface are modeled as ODEs, where the derivatives of their coordinates are parameterized via a learnable Lipschitz-continuous deformation network. This provides theoretical guarantees for the prevention of self-intersections. CortexODE can be integrated to an automatic learning-based pipeline, which reconstructs cortical surfaces efficiently in less than 5 seconds. The pipeline utilizes a 3D U-Net to predict a white matter segmentation from brain Magnetic Resonance Imaging (MRI) scans, and further generates a signed distance function that represents an initial surface. Fast topology correction is introduced to guarantee homeomorphism to a sphere. Following the isosurface extraction step, two CortexODE models are trained to deform the initial surface to white matter and pial surfaces respectively. The proposed pipeline is evaluated on large-scale neuroimage datasets in various age groups including neonates (25-45 weeks), young adults (22-36 years) and elderly subjects (55-90 years). Our experiments demonstrate that the CortexODE-based pipeline can achieve less than 0.2mm average geometric error while being orders of magnitude faster compared to conventional processing pipelines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.08329v2-abstract-full').style.display = 'none'; document.getElementById('2202.08329v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE Transactions on Medical Imaging</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.03485">arXiv:2111.03485</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2111.03485">pdf</a>, <a href="https://arxiv.org/format/2111.03485">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cross Modality 3D Navigation Using Reinforcement Learning and Neural Style Transfer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Magnetti%2C+C">Cesare Magnetti</a>, <a href="/search/cs?searchtype=author&amp;query=Reynaud%2C+H">Hadrien Reynaud</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.03485v1-abstract-short" style="display: inline;"> This paper presents the use of Multi-Agent Reinforcement Learning (MARL) to perform navigation in 3D anatomical volumes from medical imaging. We utilize Neural Style Transfer to create synthetic Computed Tomography (CT) agent gym environments and assess the generalization capabilities of our agents to clinical CT volumes. Our framework does not require any labelled clinical data and integrates eas&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.03485v1-abstract-full').style.display = 'inline'; document.getElementById('2111.03485v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.03485v1-abstract-full" style="display: none;"> This paper presents the use of Multi-Agent Reinforcement Learning (MARL) to perform navigation in 3D anatomical volumes from medical imaging. We utilize Neural Style Transfer to create synthetic Computed Tomography (CT) agent gym environments and assess the generalization capabilities of our agents to clinical CT volumes. Our framework does not require any labelled clinical data and integrates easily with several image translation techniques, enabling cross modality applications. Further, we solely condition our agents on 2D slices, breaking grounds for 3D guidance in much more difficult imaging modalities, such as ultrasound imaging. This is an important step towards user guidance during the acquisition of standardised diagnostic view planes, improving diagnostic consistency and facilitating better case comparison. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.03485v1-abstract-full').style.display = 'none'; document.getElementById('2111.03485v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.13289">arXiv:2110.13289</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.13289">pdf</a>, <a href="https://arxiv.org/format/2110.13289">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Uncertainty quantification in non-rigid image registration via stochastic gradient Markov chain Monte Carlo </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Grzech%2C+D">Daniel Grzech</a>, <a href="/search/cs?searchtype=author&amp;query=Azampour%2C+M+F">Mohammad Farid Azampour</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+H">Huaqi Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Glocker%2C+B">Ben Glocker</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Folgoc%2C+L+L">Lo茂c Le Folgoc</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.13289v1-abstract-short" style="display: inline;"> We develop a new Bayesian model for non-rigid registration of three-dimensional medical images, with a focus on uncertainty quantification. Probabilistic registration of large images with calibrated uncertainty estimates is difficult for both computational and modelling reasons. To address the computational issues, we explore connections between the Markov chain Monte Carlo by backpropagation and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.13289v1-abstract-full').style.display = 'inline'; document.getElementById('2110.13289v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.13289v1-abstract-full" style="display: none;"> We develop a new Bayesian model for non-rigid registration of three-dimensional medical images, with a focus on uncertainty quantification. Probabilistic registration of large images with calibrated uncertainty estimates is difficult for both computational and modelling reasons. To address the computational issues, we explore connections between the Markov chain Monte Carlo by backpropagation and the variational inference by backpropagation frameworks, in order to efficiently draw samples from the posterior distribution of transformation parameters. To address the modelling issues, we formulate a Bayesian model for image registration that overcomes the existing barriers when using a dense, high-dimensional, and diffeomorphic transformation parametrisation. This results in improved calibration of uncertainty estimates. We compare the model in terms of both image registration accuracy and uncertainty quantification to VoxelMorph, a state-of-the-art image registration model based on deep learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.13289v1-abstract-full').style.display = 'none'; document.getElementById('2110.13289v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MELBA Special Issue: Uncertainty for Safe Utilization of Machine Learning in Medical Imaging (UNSURE) 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.15222">arXiv:2109.15222</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.15222">pdf</a>, <a href="https://arxiv.org/format/2109.15222">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Natural Synthetic Anomalies for Self-Supervised Anomaly Detection and Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Schl%C3%BCter%2C+H+M">Hannah M. Schl眉ter</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+J">Jeremy Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+B">Benjamin Hou</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.15222v3-abstract-short" style="display: inline;"> We introduce a simple and intuitive self-supervision task, Natural Synthetic Anomalies (NSA), for training an end-to-end model for anomaly detection and localization using only normal training data. NSA integrates Poisson image editing to seamlessly blend scaled patches of various sizes from separate images. This creates a wide range of synthetic anomalies which are more similar to natural sub-ima&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.15222v3-abstract-full').style.display = 'inline'; document.getElementById('2109.15222v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.15222v3-abstract-full" style="display: none;"> We introduce a simple and intuitive self-supervision task, Natural Synthetic Anomalies (NSA), for training an end-to-end model for anomaly detection and localization using only normal training data. NSA integrates Poisson image editing to seamlessly blend scaled patches of various sizes from separate images. This creates a wide range of synthetic anomalies which are more similar to natural sub-image irregularities than previous data-augmentation strategies for self-supervised anomaly detection. We evaluate the proposed method using natural and medical images. Our experiments with the MVTec AD dataset show that a model trained to localize NSA anomalies generalizes well to detecting real-world a priori unknown types of manufacturing defects. Our method achieves an overall detection AUROC of 97.2 outperforming all previous methods that learn without the use of additional datasets. Code available at https://github.com/hmsch/natural-synthetic-anomalies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.15222v3-abstract-full').style.display = 'none'; document.getElementById('2109.15222v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ECCV 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.06519">arXiv:2109.06519</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.06519">pdf</a>, <a href="https://arxiv.org/format/2109.06519">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> PRETUS: A plug-in based platform for real-time ultrasound imaging research </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gomez%2C+A">Alberto Gomez</a>, <a href="/search/cs?searchtype=author&amp;query=Zimmer%2C+V+A">Veronika A. Zimmer</a>, <a href="/search/cs?searchtype=author&amp;query=Wheeler%2C+G">Gavin Wheeler</a>, <a href="/search/cs?searchtype=author&amp;query=Toussaint%2C+N">Nicolas Toussaint</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+S">Shujie Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Wright%2C+R">Robert Wright</a>, <a href="/search/cs?searchtype=author&amp;query=Skelton%2C+E">Emily Skelton</a>, <a href="/search/cs?searchtype=author&amp;query=Matthew%2C+J">Jackie Matthew</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Hajnal%2C+J">Jo Hajnal</a>, <a href="/search/cs?searchtype=author&amp;query=Schnabel%2C+J">Julia Schnabel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.06519v1-abstract-short" style="display: inline;"> We present PRETUS -a Plugin-based Real Time UltraSound software platform for live ultrasound image analysis and operator support. The software is lightweight; functionality is brought in via independent plug-ins that can be arranged in sequence. The software allows to capture the real-time stream of ultrasound images from virtually any ultrasound machine, applies computational methods and visualis&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.06519v1-abstract-full').style.display = 'inline'; document.getElementById('2109.06519v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.06519v1-abstract-full" style="display: none;"> We present PRETUS -a Plugin-based Real Time UltraSound software platform for live ultrasound image analysis and operator support. The software is lightweight; functionality is brought in via independent plug-ins that can be arranged in sequence. The software allows to capture the real-time stream of ultrasound images from virtually any ultrasound machine, applies computational methods and visualises the results on-the-fly. Plug-ins can run concurrently without blocking each other. They can be implemented in C ++ and Python. A graphical user interface can be implemented for each plug-in, and presented to the user in a compact way. The software is free and open source, and allows for rapid prototyping and testing of real-time ultrasound imaging methods in a manufacturer-agnostic fashion. The software is provided with input, output and processing plug-ins, as well as with tutorials to illustrate how to develop new plug-ins for PRETUS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.06519v1-abstract-full').style.display = 'none'; document.getElementById('2109.06519v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 65-04 (Primary); 92C55 (Secondary) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.01904">arXiv:2109.01904</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.01904">pdf</a>, <a href="https://arxiv.org/format/2109.01904">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Estimating Categorical Counterfactuals via Deep Twin Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Vlontzos%2C+A">Athanasios Vlontzos</a>, <a href="/search/cs?searchtype=author&amp;query=Kainz%2C+B">Bernhard Kainz</a>, <a href="/search/cs?searchtype=author&amp;query=Gilligan-Lee%2C+C+M">Ciaran M. Gilligan-Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.01904v6-abstract-short" style="display: inline;"> Counterfactual inference is a powerful tool, capable of solving challenging problems in high-profile sectors. To perform counterfactual inference, one requires knowledge of the underlying causal mechanisms. However, causal mechanisms cannot be uniquely determined from observations and interventions alone. This raises the question of how to choose the causal mechanisms so that resulting counterfact&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.01904v6-abstract-full').style.display = 'inline'; document.getElementById('2109.01904v6-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.01904v6-abstract-full" style="display: none;"> Counterfactual inference is a powerful tool, capable of solving challenging problems in high-profile sectors. To perform counterfactual inference, one requires knowledge of the underlying causal mechanisms. However, causal mechanisms cannot be uniquely determined from observations and interventions alone. This raises the question of how to choose the causal mechanisms so that resulting counterfactual inference is trustworthy in a given domain. This question has been addressed in causal models with binary variables, but the case of categorical variables remains unanswered. We address this challenge by introducing for causal models with categorical variables the notion of counterfactual ordering, a principle that posits desirable properties causal mechanisms should posses, and prove that it is equivalent to specific functional constraints on the causal mechanisms. To learn causal mechanisms satisfying these constraints, and perform counterfactual inference with them, we introduce deep twin networks. These are deep neural networks that, when trained, are capable of twin network counterfactual inference -- an alternative to the abduction, action, &amp; prediction method. We empirically test our approach on diverse real-world and semi-synthetic data from medicine, epidemiology, and finance, reporting accurate estimation of counterfactual probabilities while demonstrating the issues that arise with counterfactual reasoning when counterfactual ordering is not enforced. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.01904v6-abstract-full').style.display = 'none'; document.getElementById('2109.01904v6-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Kainz%2C+B&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Kainz%2C+B&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kainz%2C+B&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Kainz%2C+B&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10