Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–19 of 19 results for author: <span class="mathjax">Arasteh, S T</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Arasteh%2C+S+T">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Arasteh, S T"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Arasteh%2C+S+T&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Arasteh, S T"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19078">arXiv:2409.19078</a> <span> [<a href="https://arxiv.org/pdf/2409.19078">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Differential privacy for protecting patient data in speech disorder detection using deep learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Lotfinia%2C+M">Mahshad Lotfinia</a>, <a href="/search/cs?searchtype=author&query=Perez-Toro%2C+P+A">Paula Andrea Perez-Toro</a>, <a href="/search/cs?searchtype=author&query=Arias-Vergara%2C+T">Tomas Arias-Vergara</a>, <a href="/search/cs?searchtype=author&query=Orozco-Arroyave%2C+J+R">Juan Rafael Orozco-Arroyave</a>, <a href="/search/cs?searchtype=author&query=Schuster%2C+M">Maria Schuster</a>, <a href="/search/cs?searchtype=author&query=Maier%2C+A">Andreas Maier</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S+H">Seung Hee Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19078v1-abstract-short" style="display: inline;"> Speech pathology has impacts on communication abilities and quality of life. While deep learning-based models have shown potential in diagnosing these disorders, the use of sensitive data raises critical privacy concerns. Although differential privacy (DP) has been explored in the medical imaging domain, its application in pathological speech analysis remains largely unexplored despite the equally… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19078v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19078v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19078v1-abstract-full" style="display: none;"> Speech pathology has impacts on communication abilities and quality of life. While deep learning-based models have shown potential in diagnosing these disorders, the use of sensitive data raises critical privacy concerns. Although differential privacy (DP) has been explored in the medical imaging domain, its application in pathological speech analysis remains largely unexplored despite the equally critical privacy concerns. This study is the first to investigate DP's impact on pathological speech data, focusing on the trade-offs between privacy, diagnostic accuracy, and fairness. Using a large, real-world dataset of 200 hours of recordings from 2,839 German-speaking participants, we observed a maximum accuracy reduction of 3.85% when training with DP with a privacy budget, denoted by 蔚, of 7.51. To generalize our findings, we validated our approach on a smaller dataset of Spanish-speaking Parkinson's disease patients, demonstrating that careful pretraining on large-scale task-specific datasets can maintain or even improve model accuracy under DP constraints. We also conducted a comprehensive fairness analysis, revealing that reasonable privacy levels (2<蔚<10) do not introduce significant gender bias, though age-related disparities may require further attention. Our results suggest that DP can effectively balance privacy and utility in speech disorder detection, but also highlight the unique challenges in the speech domain, particularly regarding the privacy-fairness trade-off. This provides a foundation for future work to refine DP methodologies and address fairness across diverse patient groups in real-world deployments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19078v1-abstract-full').style.display = 'none'; document.getElementById('2409.19078v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15621">arXiv:2407.15621</a> <span> [<a href="https://arxiv.org/pdf/2407.15621">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> RadioRAG: Factual Large Language Models for Enhanced Diagnostics in Radiology Using Dynamic Retrieval Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Lotfinia%2C+M">Mahshad Lotfinia</a>, <a href="/search/cs?searchtype=author&query=Bressem%2C+K">Keno Bressem</a>, <a href="/search/cs?searchtype=author&query=Siepmann%2C+R">Robert Siepmann</a>, <a href="/search/cs?searchtype=author&query=Ferber%2C+D">Dyke Ferber</a>, <a href="/search/cs?searchtype=author&query=Kuhl%2C+C">Christiane Kuhl</a>, <a href="/search/cs?searchtype=author&query=Kather%2C+J+N">Jakob Nikolas Kather</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15621v1-abstract-short" style="display: inline;"> Large language models (LLMs) have advanced the field of artificial intelligence (AI) in medicine. However LLMs often generate outdated or inaccurate information based on static training datasets. Retrieval augmented generation (RAG) mitigates this by integrating outside data sources. While previous RAG systems used pre-assembled, fixed databases with limited flexibility, we have developed Radiolog… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15621v1-abstract-full').style.display = 'inline'; document.getElementById('2407.15621v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15621v1-abstract-full" style="display: none;"> Large language models (LLMs) have advanced the field of artificial intelligence (AI) in medicine. However LLMs often generate outdated or inaccurate information based on static training datasets. Retrieval augmented generation (RAG) mitigates this by integrating outside data sources. While previous RAG systems used pre-assembled, fixed databases with limited flexibility, we have developed Radiology RAG (RadioRAG) as an end-to-end framework that retrieves data from authoritative radiologic online sources in real-time. RadioRAG is evaluated using a dedicated radiologic question-and-answer dataset (RadioQA). We evaluate the diagnostic accuracy of various LLMs when answering radiology-specific questions with and without access to additional online information via RAG. Using 80 questions from RSNA Case Collection across radiologic subspecialties and 24 additional expert-curated questions, for which the correct gold-standard answers were available, LLMs (GPT-3.5-turbo, GPT-4, Mistral-7B, Mixtral-8x7B, and Llama3 [8B and 70B]) were prompted with and without RadioRAG. RadioRAG retrieved context-specific information from www.radiopaedia.org in real-time and incorporated them into its reply. RadioRAG consistently improved diagnostic accuracy across all LLMs, with relative improvements ranging from 2% to 54%. It matched or exceeded question answering without RAG across radiologic subspecialties, particularly in breast imaging and emergency radiology. However, degree of improvement varied among models; GPT-3.5-turbo and Mixtral-8x7B-instruct-v0.1 saw notable gains, while Mistral-7B-instruct-v0.2 showed no improvement, highlighting variability in its effectiveness. LLMs benefit when provided access to domain-specific data beyond their training data. For radiology, RadioRAG establishes a robust framework that substantially improves diagnostic accuracy and factuality in radiological question answering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15621v1-abstract-full').style.display = 'none'; document.getElementById('2407.15621v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.08064">arXiv:2404.08064</a> <span> [<a href="https://arxiv.org/pdf/2404.08064">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s43856-024-00609-5">10.1038/s43856-024-00609-5 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The Impact of Speech Anonymization on Pathology and Its Limits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Arias-Vergara%2C+T">Tomas Arias-Vergara</a>, <a href="/search/cs?searchtype=author&query=Perez-Toro%2C+P+A">Paula Andrea Perez-Toro</a>, <a href="/search/cs?searchtype=author&query=Weise%2C+T">Tobias Weise</a>, <a href="/search/cs?searchtype=author&query=Packhaeuser%2C+K">Kai Packhaeuser</a>, <a href="/search/cs?searchtype=author&query=Schuster%2C+M">Maria Schuster</a>, <a href="/search/cs?searchtype=author&query=Noeth%2C+E">Elmar Noeth</a>, <a href="/search/cs?searchtype=author&query=Maier%2C+A">Andreas Maier</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S+H">Seung Hee Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.08064v4-abstract-short" style="display: inline;"> Integration of speech into healthcare has intensified privacy concerns due to its potential as a non-invasive biomarker containing individual biometric information. In response, speaker anonymization aims to conceal personally identifiable information while retaining crucial linguistic content. However, the application of anonymization techniques to pathological speech, a critical area where priva… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08064v4-abstract-full').style.display = 'inline'; document.getElementById('2404.08064v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.08064v4-abstract-full" style="display: none;"> Integration of speech into healthcare has intensified privacy concerns due to its potential as a non-invasive biomarker containing individual biometric information. In response, speaker anonymization aims to conceal personally identifiable information while retaining crucial linguistic content. However, the application of anonymization techniques to pathological speech, a critical area where privacy is especially vital, has not been extensively examined. This study investigates anonymization's impact on pathological speech across over 2,700 speakers from multiple German institutions, focusing on privacy, pathological utility, and demographic fairness. We explore both deep-learning-based and signal processing-based anonymization methods. We document substantial privacy improvements across disorders-evidenced by equal error rate increases up to 1933%, with minimal overall impact on utility. Specific disorders such as Dysarthria, Dysphonia, and Cleft Lip and Palate experience minimal utility changes, while Dysglossia shows slight improvements. Our findings underscore that the impact of anonymization varies substantially across different disorders. This necessitates disorder-specific anonymization strategies to optimally balance privacy with diagnostic utility. Additionally, our fairness analysis reveals consistent anonymization effects across most of the demographics. This study demonstrates the effectiveness of anonymization in pathological speech for enhancing privacy, while also highlighting the importance of customized and disorder-specific approaches to account for inversion attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08064v4-abstract-full').style.display = 'none'; document.getElementById('2404.08064v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Communications Medicine</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Commun Med 4, (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.00757">arXiv:2310.00757</a> <span> [<a href="https://arxiv.org/pdf/2310.00757">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41598-023-49956-8">10.1038/s41598-023-49956-8 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Mind the Gap: Federated Learning Broadens Domain Generalization in Diagnostic AI Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Kuhl%2C+C">Christiane Kuhl</a>, <a href="/search/cs?searchtype=author&query=Saehn%2C+M">Marwin-Jonathan Saehn</a>, <a href="/search/cs?searchtype=author&query=Isfort%2C+P">Peter Isfort</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.00757v2-abstract-short" style="display: inline;"> Developing robust artificial intelligence (AI) models that generalize well to unseen datasets is challenging and usually requires large and variable datasets, preferably from multiple institutions. In federated learning (FL), a model is trained collaboratively at numerous sites that hold local datasets without exchanging them. So far, the impact of training strategy, i.e., local versus collaborati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.00757v2-abstract-full').style.display = 'inline'; document.getElementById('2310.00757v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.00757v2-abstract-full" style="display: none;"> Developing robust artificial intelligence (AI) models that generalize well to unseen datasets is challenging and usually requires large and variable datasets, preferably from multiple institutions. In federated learning (FL), a model is trained collaboratively at numerous sites that hold local datasets without exchanging them. So far, the impact of training strategy, i.e., local versus collaborative, on the diagnostic on-domain and off-domain performance of AI models interpreting chest radiographs has not been assessed. Consequently, using 610,000 chest radiographs from five institutions across the globe, we assessed diagnostic performance as a function of training strategy (i.e., local vs. collaborative), network architecture (i.e., convolutional vs. transformer-based), generalization performance (i.e., on-domain vs. off-domain), imaging finding (i.e., cardiomegaly, pleural effusion, pneumonia, atelectasis, consolidation, pneumothorax, and no abnormality), dataset size (i.e., from n=18,000 to 213,921 radiographs), and dataset diversity. Large datasets not only showed minimal performance gains with FL but, in some instances, even exhibited decreases. In contrast, smaller datasets revealed marked improvements. Thus, on-domain performance was mainly driven by training data size. However, off-domain performance leaned more on training diversity. When trained collaboratively across diverse external institutions, AI models consistently surpassed models trained locally for off-domain tasks, emphasizing FL's potential in leveraging data diversity. In conclusion, FL can bolster diagnostic privacy, reproducibility, and off-domain reliability of AI models and, potentially, optimize healthcare outcomes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.00757v2-abstract-full').style.display = 'none'; document.getElementById('2310.00757v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Nature Scientific Reports</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Sci Rep 13, 22576 (2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.14120">arXiv:2308.14120</a> <span> [<a href="https://arxiv.org/pdf/2308.14120">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41467-024-45879-8">10.1038/s41467-024-45879-8 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Large Language Models Streamline Automated Machine Learning for Clinical Studies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Han%2C+T">Tianyu Han</a>, <a href="/search/cs?searchtype=author&query=Lotfinia%2C+M">Mahshad Lotfinia</a>, <a href="/search/cs?searchtype=author&query=Kuhl%2C+C">Christiane Kuhl</a>, <a href="/search/cs?searchtype=author&query=Kather%2C+J+N">Jakob Nikolas Kather</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.14120v5-abstract-short" style="display: inline;"> A knowledge gap persists between machine learning (ML) developers (e.g., data scientists) and practitioners (e.g., clinicians), hampering the full utilization of ML for clinical data analysis. We investigated the potential of the ChatGPT Advanced Data Analysis (ADA), an extension of GPT-4, to bridge this gap and perform ML analyses efficiently. Real-world clinical datasets and study details from l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.14120v5-abstract-full').style.display = 'inline'; document.getElementById('2308.14120v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.14120v5-abstract-full" style="display: none;"> A knowledge gap persists between machine learning (ML) developers (e.g., data scientists) and practitioners (e.g., clinicians), hampering the full utilization of ML for clinical data analysis. We investigated the potential of the ChatGPT Advanced Data Analysis (ADA), an extension of GPT-4, to bridge this gap and perform ML analyses efficiently. Real-world clinical datasets and study details from large trials across various medical specialties were presented to ChatGPT ADA without specific guidance. ChatGPT ADA autonomously developed state-of-the-art ML models based on the original study's training data to predict clinical outcomes such as cancer development, cancer progression, disease complications, or biomarkers such as pathogenic gene sequences. Following the re-implementation and optimization of the published models, the head-to-head comparison of the ChatGPT ADA-crafted ML models and their respective manually crafted counterparts revealed no significant differences in traditional performance metrics (P>0.071). Strikingly, the ChatGPT ADA-crafted ML models often outperformed their counterparts. In conclusion, ChatGPT ADA offers a promising avenue to democratize ML in medicine by simplifying complex data analyses, yet should enhance, not replace, specialized training and resources, to promote broader applications in medical research and practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.14120v5-abstract-full').style.display = 'none'; document.getElementById('2308.14120v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Nature Communications</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Nat Commun 15, 1603 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.07688">arXiv:2308.07688</a> <span> [<a href="https://arxiv.org/pdf/2308.07688">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1186/s41747-023-00411-3">10.1186/s41747-023-00411-3 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Enhancing Network Initialization for Medical AI Models Using Large-Scale, Unlabeled Natural Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Misera%2C+L">Leo Misera</a>, <a href="/search/cs?searchtype=author&query=Kather%2C+J+N">Jakob Nikolas Kather</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.07688v5-abstract-short" style="display: inline;"> Pre-training datasets, like ImageNet, have become the gold standard in medical image analysis. However, the emergence of self-supervised learning (SSL), which leverages unlabeled data to learn robust features, presents an opportunity to bypass the intensive labeling process. In this study, we explored if SSL for pre-training on non-medical images can be applied to chest radiographs and how it comp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07688v5-abstract-full').style.display = 'inline'; document.getElementById('2308.07688v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.07688v5-abstract-full" style="display: none;"> Pre-training datasets, like ImageNet, have become the gold standard in medical image analysis. However, the emergence of self-supervised learning (SSL), which leverages unlabeled data to learn robust features, presents an opportunity to bypass the intensive labeling process. In this study, we explored if SSL for pre-training on non-medical images can be applied to chest radiographs and how it compares to supervised pre-training on non-medical images and on medical images. We utilized a vision transformer and initialized its weights based on (i) SSL pre-training on natural images (DINOv2), (ii) SL pre-training on natural images (ImageNet dataset), and (iii) SL pre-training on chest radiographs from the MIMIC-CXR database. We tested our approach on over 800,000 chest radiographs from six large global datasets, diagnosing more than 20 different imaging findings. Our SSL pre-training on curated images not only outperformed ImageNet-based pre-training (P<0.001 for all datasets) but, in certain cases, also exceeded SL on the MIMIC-CXR dataset. Our findings suggest that selecting the right pre-training strategy, especially with SSL, can be pivotal for improving artificial intelligence (AI)'s diagnostic accuracy in medical imaging. By demonstrating the promise of SSL in chest radiograph analysis, we underline a transformative shift towards more efficient and accurate AI models in medical imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.07688v5-abstract-full').style.display = 'none'; document.getElementById('2308.07688v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in European Radiology Experimental</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Eur Radiol Exp 8, 10 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06503">arXiv:2306.06503</a> <span> [<a href="https://arxiv.org/pdf/2306.06503">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1148/ryai.230212">10.1148/ryai.230212 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Preserving privacy in domain transfer of medical AI models comes at no performance costs: The integral role of differential privacy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Lotfinia%2C+M">Mahshad Lotfinia</a>, <a href="/search/cs?searchtype=author&query=Nolte%2C+T">Teresa Nolte</a>, <a href="/search/cs?searchtype=author&query=Saehn%2C+M">Marwin Saehn</a>, <a href="/search/cs?searchtype=author&query=Isfort%2C+P">Peter Isfort</a>, <a href="/search/cs?searchtype=author&query=Kuhl%2C+C">Christiane Kuhl</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a>, <a href="/search/cs?searchtype=author&query=Kaissis%2C+G">Georgios Kaissis</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06503v2-abstract-short" style="display: inline;"> Developing robust and effective artificial intelligence (AI) models in medicine requires access to large amounts of patient data. The use of AI models solely trained on large multi-institutional datasets can help with this, yet the imperative to ensure data privacy remains, particularly as membership inference risks breaching patient confidentiality. As a proposed remedy, we advocate for the integ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06503v2-abstract-full').style.display = 'inline'; document.getElementById('2306.06503v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06503v2-abstract-full" style="display: none;"> Developing robust and effective artificial intelligence (AI) models in medicine requires access to large amounts of patient data. The use of AI models solely trained on large multi-institutional datasets can help with this, yet the imperative to ensure data privacy remains, particularly as membership inference risks breaching patient confidentiality. As a proposed remedy, we advocate for the integration of differential privacy (DP). We specifically investigate the performance of models trained with DP as compared to models trained without DP on data from institutions that the model had not seen during its training (i.e., external validation) - the situation that is reflective of the clinical use of AI models. By leveraging more than 590,000 chest radiographs from five institutions, we evaluated the efficacy of DP-enhanced domain transfer (DP-DT) in diagnosing cardiomegaly, pleural effusion, pneumonia, atelectasis, and in identifying healthy subjects. We juxtaposed DP-DT with non-DP-DT and examined diagnostic accuracy and demographic fairness using the area under the receiver operating characteristic curve (AUC) as the main metric, as well as accuracy, sensitivity, and specificity. Our results show that DP-DT, even with exceptionally high privacy levels (epsilon around 1), performs comparably to non-DP-DT (P>0.119 across all domains). Furthermore, DP-DT led to marginal AUC differences - less than 1% - for nearly all subgroups, relative to non-DP-DT. Despite consistent evidence suggesting that DP models induce significant performance degradation for on-domain applications, we show that off-domain performance is almost not affected. Therefore, we ardently advocate for the adoption of DP in training diagnostic medical AI models, given its minimal impact on performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06503v2-abstract-full').style.display = 'none'; document.getElementById('2306.06503v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Radiology: Artificial Intelligence. RSNA</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Radiology: Artificial Intelligence, 2024, 6(1), e230212 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.11284">arXiv:2305.11284</a> <span> [<a href="https://arxiv.org/pdf/2305.11284">pdf</a>, <a href="https://arxiv.org/format/2305.11284">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.21437/Interspeech.2023-2108">10.21437/Interspeech.2023-2108 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Federated learning for secure development of AI models for Parkinson's disease detection using speech from different languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Rios-Urrego%2C+C+D">Cristian David Rios-Urrego</a>, <a href="/search/cs?searchtype=author&query=Noeth%2C+E">Elmar Noeth</a>, <a href="/search/cs?searchtype=author&query=Maier%2C+A">Andreas Maier</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S+H">Seung Hee Yang</a>, <a href="/search/cs?searchtype=author&query=Rusz%2C+J">Jan Rusz</a>, <a href="/search/cs?searchtype=author&query=Orozco-Arroyave%2C+J+R">Juan Rafael Orozco-Arroyave</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.11284v2-abstract-short" style="display: inline;"> Parkinson's disease (PD) is a neurological disorder impacting a person's speech. Among automatic PD assessment methods, deep learning models have gained particular interest. Recently, the community has explored cross-pathology and cross-language models which can improve diagnostic accuracy even further. However, strict patient data privacy regulations largely prevent institutions from sharing pati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11284v2-abstract-full').style.display = 'inline'; document.getElementById('2305.11284v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.11284v2-abstract-full" style="display: none;"> Parkinson's disease (PD) is a neurological disorder impacting a person's speech. Among automatic PD assessment methods, deep learning models have gained particular interest. Recently, the community has explored cross-pathology and cross-language models which can improve diagnostic accuracy even further. However, strict patient data privacy regulations largely prevent institutions from sharing patient speech data with each other. In this paper, we employ federated learning (FL) for PD detection using speech signals from 3 real-world language corpora of German, Spanish, and Czech, each from a separate institution. Our results indicate that the FL model outperforms all the local models in terms of diagnostic accuracy, while not performing very differently from the model based on centrally combined training sets, with the advantage of not requiring any data sharing among collaborators. This will simplify inter-institutional collaborations, resulting in enhancement of patient outcomes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11284v2-abstract-full').style.display = 'none'; document.getElementById('2305.11284v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">INTERSPEECH 2023, pp. 5003--5007, Dublin, Ireland</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> INTERSPEECH 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.08972">arXiv:2304.08972</a> <span> [<a href="https://arxiv.org/pdf/2304.08972">pdf</a>, <a href="https://arxiv.org/format/2304.08972">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41598-023-41331-x">10.1038/s41598-023-41331-x <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Fibroglandular Tissue Segmentation in Breast MRI using Vision Transformers -- A multi-institutional evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=M%C3%BCller-Franzes%2C+G">Gustav M眉ller-Franzes</a>, <a href="/search/cs?searchtype=author&query=M%C3%BCller-Franzes%2C+F">Fritz M眉ller-Franzes</a>, <a href="/search/cs?searchtype=author&query=Huck%2C+L">Luisa Huck</a>, <a href="/search/cs?searchtype=author&query=Raaff%2C+V">Vanessa Raaff</a>, <a href="/search/cs?searchtype=author&query=Kemmer%2C+E">Eva Kemmer</a>, <a href="/search/cs?searchtype=author&query=Khader%2C+F">Firas Khader</a>, <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Nolte%2C+T">Teresa Nolte</a>, <a href="/search/cs?searchtype=author&query=Kather%2C+J+N">Jakob Nikolas Kather</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a>, <a href="/search/cs?searchtype=author&query=Kuhl%2C+C">Christiane Kuhl</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.08972v1-abstract-short" style="display: inline;"> Accurate and automatic segmentation of fibroglandular tissue in breast MRI screening is essential for the quantification of breast density and background parenchymal enhancement. In this retrospective study, we developed and evaluated a transformer-based neural network for breast segmentation (TraBS) in multi-institutional MRI data, and compared its performance to the well established convolutiona… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.08972v1-abstract-full').style.display = 'inline'; document.getElementById('2304.08972v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.08972v1-abstract-full" style="display: none;"> Accurate and automatic segmentation of fibroglandular tissue in breast MRI screening is essential for the quantification of breast density and background parenchymal enhancement. In this retrospective study, we developed and evaluated a transformer-based neural network for breast segmentation (TraBS) in multi-institutional MRI data, and compared its performance to the well established convolutional neural network nnUNet. TraBS and nnUNet were trained and tested on 200 internal and 40 external breast MRI examinations using manual segmentations generated by experienced human readers. Segmentation performance was assessed in terms of the Dice score and the average symmetric surface distance. The Dice score for nnUNet was lower than for TraBS on the internal testset (0.909$\pm$0.069 versus 0.916$\pm$0.067, P<0.001) and on the external testset (0.824$\pm$0.144 versus 0.864$\pm$0.081, P=0.004). Moreover, the average symmetric surface distance was higher (=worse) for nnUNet than for TraBS on the internal (0.657$\pm$2.856 versus 0.548$\pm$2.195, P=0.001) and on the external testset (0.727$\pm$0.620 versus 0.584$\pm$0.413, P=0.03). Our study demonstrates that transformer-based networks improve the quality of fibroglandular tissue segmentation in breast MRI compared to convolutional-based models like nnUNet. These findings might help to enhance the accuracy of breast density and parenchymal enhancement quantification in breast MRI screening. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.08972v1-abstract-full').style.display = 'none'; document.getElementById('2304.08972v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Sci Rep 13, 14207 (2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.01622">arXiv:2302.01622</a> <span> [<a href="https://arxiv.org/pdf/2302.01622">pdf</a>, <a href="https://arxiv.org/format/2302.01622">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s43856-024-00462-6">10.1038/s43856-024-00462-6 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Private, fair and accurate: Training large-scale, privacy-preserving AI models in medical imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Ziller%2C+A">Alexander Ziller</a>, <a href="/search/cs?searchtype=author&query=Kuhl%2C+C">Christiane Kuhl</a>, <a href="/search/cs?searchtype=author&query=Makowski%2C+M">Marcus Makowski</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a>, <a href="/search/cs?searchtype=author&query=Braren%2C+R">Rickmer Braren</a>, <a href="/search/cs?searchtype=author&query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a>, <a href="/search/cs?searchtype=author&query=Kaissis%2C+G">Georgios Kaissis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.01622v5-abstract-short" style="display: inline;"> Artificial intelligence (AI) models are increasingly used in the medical domain. However, as medical data is highly sensitive, special precautions to ensure its protection are required. The gold standard for privacy preservation is the introduction of differential privacy (DP) to model training. Prior work indicates that DP has negative implications on model accuracy and fairness, which are unacce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.01622v5-abstract-full').style.display = 'inline'; document.getElementById('2302.01622v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.01622v5-abstract-full" style="display: none;"> Artificial intelligence (AI) models are increasingly used in the medical domain. However, as medical data is highly sensitive, special precautions to ensure its protection are required. The gold standard for privacy preservation is the introduction of differential privacy (DP) to model training. Prior work indicates that DP has negative implications on model accuracy and fairness, which are unacceptable in medicine and represent a main barrier to the widespread use of privacy-preserving techniques. In this work, we evaluated the effect of privacy-preserving training of AI models regarding accuracy and fairness compared to non-private training. For this, we used two datasets: (1) A large dataset (N=193,311) of high quality clinical chest radiographs, and (2) a dataset (N=1,625) of 3D abdominal computed tomography (CT) images, with the task of classifying the presence of pancreatic ductal adenocarcinoma (PDAC). Both were retrospectively collected and manually labeled by experienced radiologists. We then compared non-private deep convolutional neural networks (CNNs) and privacy-preserving (DP) models with respect to privacy-utility trade-offs measured as area under the receiver-operator-characteristic curve (AUROC), and privacy-fairness trade-offs, measured as Pearson's r or Statistical Parity Difference. We found that, while the privacy-preserving trainings yielded lower accuracy, they did largely not amplify discrimination against age, sex or co-morbidity. Our study shows that -- under the challenging realistic circumstances of a real-life clinical dataset -- the privacy-preserving training of diagnostic deep learning models is possible with excellent diagnostic accuracy and fairness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.01622v5-abstract-full').style.display = 'none'; document.getElementById('2302.01622v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Communications Medicine. Nature Portfolio</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Commun Med 4(1), 46 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.09162">arXiv:2212.09162</a> <span> [<a href="https://arxiv.org/pdf/2212.09162">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Medical Diagnosis with Large Scale Multimodal Transformers: Leveraging Diverse Data for More Accurate Diagnosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khader%2C+F">Firas Khader</a>, <a href="/search/cs?searchtype=author&query=Mueller-Franzes%2C+G">Gustav Mueller-Franzes</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianci Wang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+T">Tianyu Han</a>, <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Haarburger%2C+C">Christoph Haarburger</a>, <a href="/search/cs?searchtype=author&query=Stegmaier%2C+J">Johannes Stegmaier</a>, <a href="/search/cs?searchtype=author&query=Bressem%2C+K">Keno Bressem</a>, <a href="/search/cs?searchtype=author&query=Kuhl%2C+C">Christiane Kuhl</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a>, <a href="/search/cs?searchtype=author&query=Kather%2C+J+N">Jakob Nikolas Kather</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.09162v2-abstract-short" style="display: inline;"> Multimodal deep learning has been used to predict clinical endpoints and diagnoses from clinical routine data. However, these models suffer from scaling issues: they have to learn pairwise interactions between each piece of information in each data type, thereby escalating model complexity beyond manageable scales. This has so far precluded a widespread use of multimodal deep learning. Here, we pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.09162v2-abstract-full').style.display = 'inline'; document.getElementById('2212.09162v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.09162v2-abstract-full" style="display: none;"> Multimodal deep learning has been used to predict clinical endpoints and diagnoses from clinical routine data. However, these models suffer from scaling issues: they have to learn pairwise interactions between each piece of information in each data type, thereby escalating model complexity beyond manageable scales. This has so far precluded a widespread use of multimodal deep learning. Here, we present a new technical approach of "learnable synergies", in which the model only selects relevant interactions between data modalities and keeps an "internal memory" of relevant data. Our approach is easily scalable and naturally adapts to multimodal data inputs from clinical routine. We demonstrate this approach on three large multimodal datasets from radiology and ophthalmology and show that it outperforms state-of-the-art models in clinically relevant diagnosis tasks. Our new approach is transferable and will allow the application of multimodal deep learning to a broad set of clinically relevant problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.09162v2-abstract-full').style.display = 'none'; document.getElementById('2212.09162v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.07501">arXiv:2212.07501</a> <span> [<a href="https://arxiv.org/pdf/2212.07501">pdf</a>, <a href="https://arxiv.org/format/2212.07501">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41598-023-39278-0">10.1038/s41598-023-39278-0 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Diffusion Probabilistic Models beat GANs on Medical Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=M%C3%BCller-Franzes%2C+G">Gustav M眉ller-Franzes</a>, <a href="/search/cs?searchtype=author&query=Niehues%2C+J+M">Jan Moritz Niehues</a>, <a href="/search/cs?searchtype=author&query=Khader%2C+F">Firas Khader</a>, <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Haarburger%2C+C">Christoph Haarburger</a>, <a href="/search/cs?searchtype=author&query=Kuhl%2C+C">Christiane Kuhl</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianci Wang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+T">Tianyu Han</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a>, <a href="/search/cs?searchtype=author&query=Kather%2C+J+N">Jakob Nikolas Kather</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.07501v1-abstract-short" style="display: inline;"> The success of Deep Learning applications critically depends on the quality and scale of the underlying training data. Generative adversarial networks (GANs) can generate arbitrary large datasets, but diversity and fidelity are limited, which has recently been addressed by denoising diffusion probabilistic models (DDPMs) whose superiority has been demonstrated on natural images. In this study, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.07501v1-abstract-full').style.display = 'inline'; document.getElementById('2212.07501v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.07501v1-abstract-full" style="display: none;"> The success of Deep Learning applications critically depends on the quality and scale of the underlying training data. Generative adversarial networks (GANs) can generate arbitrary large datasets, but diversity and fidelity are limited, which has recently been addressed by denoising diffusion probabilistic models (DDPMs) whose superiority has been demonstrated on natural images. In this study, we propose Medfusion, a conditional latent DDPM for medical images. We compare our DDPM-based model against GAN-based models, which constitute the current state-of-the-art in the medical domain. Medfusion was trained and compared with (i) StyleGan-3 on n=101,442 images from the AIROGS challenge dataset to generate fundoscopies with and without glaucoma, (ii) ProGAN on n=191,027 from the CheXpert dataset to generate radiographs with and without cardiomegaly and (iii) wGAN on n=19,557 images from the CRCMS dataset to generate histopathological images with and without microsatellite stability. In the AIROGS, CRMCS, and CheXpert datasets, Medfusion achieved lower (=better) FID than the GANs (11.63 versus 20.43, 30.03 versus 49.26, and 17.28 versus 84.31). Also, fidelity (precision) and diversity (recall) were higher (=better) for Medfusion in all three datasets. Our study shows that DDPM are a superior alternative to GANs for image synthesis in the medical domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.07501v1-abstract-full').style.display = 'none'; document.getElementById('2212.07501v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Sci Rep 13, 12098 (2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.13606">arXiv:2211.13606</a> <span> [<a href="https://arxiv.org/pdf/2211.13606">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41598-023-33303-y">10.1038/s41598-023-33303-y <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Collaborative Training of Medical Artificial Intelligence Models with non-uniform Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Isfort%2C+P">Peter Isfort</a>, <a href="/search/cs?searchtype=author&query=Saehn%2C+M">Marwin Saehn</a>, <a href="/search/cs?searchtype=author&query=Mueller-Franzes%2C+G">Gustav Mueller-Franzes</a>, <a href="/search/cs?searchtype=author&query=Khader%2C+F">Firas Khader</a>, <a href="/search/cs?searchtype=author&query=Kather%2C+J+N">Jakob Nikolas Kather</a>, <a href="/search/cs?searchtype=author&query=Kuhl%2C+C">Christiane Kuhl</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.13606v2-abstract-short" style="display: inline;"> Due to the rapid advancements in recent years, medical image analysis is largely dominated by deep learning (DL). However, building powerful and robust DL models requires training with large multi-party datasets. While multiple stakeholders have provided publicly available datasets, the ways in which these data are labeled vary widely. For Instance, an institution might provide a dataset of chest… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.13606v2-abstract-full').style.display = 'inline'; document.getElementById('2211.13606v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.13606v2-abstract-full" style="display: none;"> Due to the rapid advancements in recent years, medical image analysis is largely dominated by deep learning (DL). However, building powerful and robust DL models requires training with large multi-party datasets. While multiple stakeholders have provided publicly available datasets, the ways in which these data are labeled vary widely. For Instance, an institution might provide a dataset of chest radiographs containing labels denoting the presence of pneumonia, while another institution might have a focus on determining the presence of metastases in the lung. Training a single AI model utilizing all these data is not feasible with conventional federated learning (FL). This prompts us to propose an extension to the widespread FL process, namely flexible federated learning (FFL) for collaborative training on such data. Using 695,000 chest radiographs from five institutions from across the globe - each with differing labels - we demonstrate that having heterogeneously labeled datasets, FFL-based training leads to significant performance increase compared to conventional FL training, where only the uniformly annotated images are utilized. We believe that our proposed algorithm could accelerate the process of bringing collaborative training methods from research and simulation phase to the real-world applications in healthcare. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.13606v2-abstract-full').style.display = 'none'; document.getElementById('2211.13606v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Nature Scientific Reports</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Sci Rep 13, 6046 (2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03364">arXiv:2211.03364</a> <span> [<a href="https://arxiv.org/pdf/2211.03364">pdf</a>, <a href="https://arxiv.org/format/2211.03364">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Medical Diffusion: Denoising Diffusion Probabilistic Models for 3D Medical Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khader%2C+F">Firas Khader</a>, <a href="/search/cs?searchtype=author&query=Mueller-Franzes%2C+G">Gustav Mueller-Franzes</a>, <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Han%2C+T">Tianyu Han</a>, <a href="/search/cs?searchtype=author&query=Haarburger%2C+C">Christoph Haarburger</a>, <a href="/search/cs?searchtype=author&query=Schulze-Hagen%2C+M">Maximilian Schulze-Hagen</a>, <a href="/search/cs?searchtype=author&query=Schad%2C+P">Philipp Schad</a>, <a href="/search/cs?searchtype=author&query=Engelhardt%2C+S">Sandy Engelhardt</a>, <a href="/search/cs?searchtype=author&query=Baessler%2C+B">Bettina Baessler</a>, <a href="/search/cs?searchtype=author&query=Foersch%2C+S">Sebastian Foersch</a>, <a href="/search/cs?searchtype=author&query=Stegmaier%2C+J">Johannes Stegmaier</a>, <a href="/search/cs?searchtype=author&query=Kuhl%2C+C">Christiane Kuhl</a>, <a href="/search/cs?searchtype=author&query=Nebelung%2C+S">Sven Nebelung</a>, <a href="/search/cs?searchtype=author&query=Kather%2C+J+N">Jakob Nikolas Kather</a>, <a href="/search/cs?searchtype=author&query=Truhn%2C+D">Daniel Truhn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03364v7-abstract-short" style="display: inline;"> Recent advances in computer vision have shown promising results in image generation. Diffusion probabilistic models in particular have generated realistic images from textual input, as demonstrated by DALL-E 2, Imagen and Stable Diffusion. However, their use in medicine, where image data typically comprises three-dimensional volumes, has not been systematically evaluated. Synthetic images may play… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03364v7-abstract-full').style.display = 'inline'; document.getElementById('2211.03364v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03364v7-abstract-full" style="display: none;"> Recent advances in computer vision have shown promising results in image generation. Diffusion probabilistic models in particular have generated realistic images from textual input, as demonstrated by DALL-E 2, Imagen and Stable Diffusion. However, their use in medicine, where image data typically comprises three-dimensional volumes, has not been systematically evaluated. Synthetic images may play a crucial role in privacy preserving artificial intelligence and can also be used to augment small datasets. Here we show that diffusion probabilistic models can synthesize high quality medical imaging data, which we show for Magnetic Resonance Images (MRI) and Computed Tomography (CT) images. We provide quantitative measurements of their performance through a reader study with two medical experts who rated the quality of the synthesized images in three categories: Realistic image appearance, anatomical correctness and consistency between slices. Furthermore, we demonstrate that synthetic images can be used in a self-supervised pre-training and improve the performance of breast segmentation models when data is scarce (dice score 0.91 vs. 0.95 without vs. with synthetic data). The code is publicly available on GitHub: https://github.com/FirasGit/medicaldiffusion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03364v7-abstract-full').style.display = 'none'; document.getElementById('2211.03364v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.06450">arXiv:2204.06450</a> <span> [<a href="https://arxiv.org/pdf/2204.06450">pdf</a>, <a href="https://arxiv.org/format/2204.06450">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41598-023-47711-7">10.1038/s41598-023-47711-7 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The effect of speech pathology on automatic speaker verification -- a large-scale study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Weise%2C+T">Tobias Weise</a>, <a href="/search/cs?searchtype=author&query=Schuster%2C+M">Maria Schuster</a>, <a href="/search/cs?searchtype=author&query=Noeth%2C+E">Elmar Noeth</a>, <a href="/search/cs?searchtype=author&query=Maier%2C+A">Andreas Maier</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S+H">Seung Hee Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.06450v3-abstract-short" style="display: inline;"> Navigating the challenges of data-driven speech processing, one of the primary hurdles is accessing reliable pathological speech data. While public datasets appear to offer solutions, they come with inherent risks of potential unintended exposure of patient health information via re-identification attacks. Using a comprehensive real-world pathological speech corpus, with over n=3,800 test subjects… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.06450v3-abstract-full').style.display = 'inline'; document.getElementById('2204.06450v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.06450v3-abstract-full" style="display: none;"> Navigating the challenges of data-driven speech processing, one of the primary hurdles is accessing reliable pathological speech data. While public datasets appear to offer solutions, they come with inherent risks of potential unintended exposure of patient health information via re-identification attacks. Using a comprehensive real-world pathological speech corpus, with over n=3,800 test subjects spanning various age groups and speech disorders, we employed a deep-learning-driven automatic speaker verification (ASV) approach. This resulted in a notable mean equal error rate (EER) of 0.89% with a standard deviation of 0.06%, outstripping traditional benchmarks. Our comprehensive assessments demonstrate that pathological speech overall faces heightened privacy breach risks compared to healthy speech. Specifically, adults with dysphonia are at heightened re-identification risks, whereas conditions like dysarthria yield results comparable to those of healthy speakers. Crucially, speech intelligibility does not influence the ASV system's performance metrics. In pediatric cases, particularly those with cleft lip and palate, the recording environment plays a decisive role in re-identification. Merging data across pathological types led to a marked EER decrease, suggesting the potential benefits of pathological diversity in ASV, accompanied by a logarithmic boost in ASV effectiveness. In essence, this research sheds light on the dynamics between pathological speech and speaker verification, emphasizing its crucial role in safeguarding patient confidentiality in our increasingly digitized healthcare era. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.06450v3-abstract-full').style.display = 'none'; document.getElementById('2204.06450v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Scientific Reports</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Sci Rep 13, 20476 (2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.10513">arXiv:2104.10513</a> <span> [<a href="https://arxiv.org/pdf/2104.10513">pdf</a>, <a href="https://arxiv.org/format/2104.10513">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICSC50631.2021.00068">10.1109/ICSC50631.2021.00068 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> How Will Your Tweet Be Received? Predicting the Sentiment Polarity of Tweet Replies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Monajem%2C+M">Mehrpad Monajem</a>, <a href="/search/cs?searchtype=author&query=Christlein%2C+V">Vincent Christlein</a>, <a href="/search/cs?searchtype=author&query=Heinrich%2C+P">Philipp Heinrich</a>, <a href="/search/cs?searchtype=author&query=Nicolaou%2C+A">Anguelos Nicolaou</a>, <a href="/search/cs?searchtype=author&query=Boldaji%2C+H+N">Hamidreza Naderi Boldaji</a>, <a href="/search/cs?searchtype=author&query=Lotfinia%2C+M">Mahshad Lotfinia</a>, <a href="/search/cs?searchtype=author&query=Evert%2C+S">Stefan Evert</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.10513v1-abstract-short" style="display: inline;"> Twitter sentiment analysis, which often focuses on predicting the polarity of tweets, has attracted increasing attention over the last years, in particular with the rise of deep learning (DL). In this paper, we propose a new task: predicting the predominant sentiment among (first-order) replies to a given tweet. Therefore, we created RETWEET, a large dataset of tweets and replies manually annotate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.10513v1-abstract-full').style.display = 'inline'; document.getElementById('2104.10513v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.10513v1-abstract-full" style="display: none;"> Twitter sentiment analysis, which often focuses on predicting the polarity of tweets, has attracted increasing attention over the last years, in particular with the rise of deep learning (DL). In this paper, we propose a new task: predicting the predominant sentiment among (first-order) replies to a given tweet. Therefore, we created RETWEET, a large dataset of tweets and replies manually annotated with sentiment labels. As a strong baseline, we propose a two-stage DL-based method: first, we create automatically labeled training data by applying a standard sentiment classifier to tweet replies and aggregating its predictions for each original tweet; our rationale is that individual errors made by the classifier are likely to cancel out in the aggregation step. Second, we use the automatically labeled data for supervised training of a neural network to predict reply sentiment from the original tweets. The resulting classifier is evaluated on the new RETWEET dataset, showing promising results, especially considering that it has been trained without any manually labeled data. Both the dataset and the baseline implementation are publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.10513v1-abstract-full').style.display = 'none'; document.getElementById('2104.10513v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in 2021 IEEE 15th International Conference on Semantic Computing (ICSC)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2021 IEEE 15th International Conference on Semantic Computing (ICSC), Laguna Hills, CA, USA, 2021, pp. 356-359 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.02470">arXiv:2102.02470</a> <span> [<a href="https://arxiv.org/pdf/2102.02470">pdf</a>, <a href="https://arxiv.org/format/2102.02470">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Machine Learning-Based Generalized Model for Finite Element Analysis of Roll Deflection During the Austenitic Stainless Steel 316L Strip Rolling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lotfinia%2C+M">Mahshad Lotfinia</a>, <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.02470v2-abstract-short" style="display: inline;"> During the strip rolling process, a considerable amount of the forces of the material pressure cause elastic deformation on the work-roll, i.e., the deflection process. The uncontrollable amount of the work-roll deflection leads to the high deviations in the permissible thickness of the plate along its width. In the context of the Austenitic Stainless Steels (ASS), due to the instability of the Au… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.02470v2-abstract-full').style.display = 'inline'; document.getElementById('2102.02470v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.02470v2-abstract-full" style="display: none;"> During the strip rolling process, a considerable amount of the forces of the material pressure cause elastic deformation on the work-roll, i.e., the deflection process. The uncontrollable amount of the work-roll deflection leads to the high deviations in the permissible thickness of the plate along its width. In the context of the Austenitic Stainless Steels (ASS), due to the instability of the Austenite phase in a cold temperature, cold deformation leads to the production of Strain-Induced Martensite (SIM), which improves the mechanical properties. It leads to the hardening of the ASS 316L during the cold deformation, which causes the Strain-Stress curve of the ASS 316L to behave non-linearly, which distinguishes it from other categories of steels. To account for this phenomenon, we propose to utilize a Machine Learning (ML) method to predict more accurately the flow stress of the ASS 316L during the cold rolling. Furthermore, we conduct various mechanical tensile tests in order to obtain the required dataset, Stress316L, for training the neural network. Moreover, instead of using a constant value of flow stress during the multi-pass rolling process, we use a Finite Difference (FD) formulation of the equilibrium equation in order to account for the dynamic behavior of the flow stress, which leads to the estimation of the mean pressure, which the strip enforces to the rolls during deformation. Finally, using the Finite Element Analysis (FEA), the deflection of the work-roll tools will be calculated. As a result, we end up with a generalized model for the calculation of the roll deflection, specific to the ASS 316L. To the best of our knowledge, this is the first model for ASS 316L which considers dynamic flow stress and SIM of the rolled plate, using FEM and an ML approach, which could contribute to the better design of the tolls. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.02470v2-abstract-full').style.display = 'none'; document.getElementById('2102.02470v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.08232">arXiv:2011.08232</a> <span> [<a href="https://arxiv.org/pdf/2011.08232">pdf</a>, <a href="https://arxiv.org/format/2011.08232">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Algebraic Geometry">math.AG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s42979-021-00770-x">10.1007/s42979-021-00770-x <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Conversion Between Cubic Bezier Curves and Catmull-Rom Splines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a>, <a href="/search/cs?searchtype=author&query=Kalisz%2C+A">Adam Kalisz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.08232v3-abstract-short" style="display: inline;"> Splines are one of the main methods of mathematically representing complicated shapes, which have become the primary technique in the fields of Computer Graphics (CG) and Computer-Aided Geometric Design (CAGD) for modeling complex surfaces. Among all, B茅zier and Catmull-Rom splines are the most common in the sub-fields of engineering. In this paper, we focus on conversion between cubic B茅zier and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.08232v3-abstract-full').style.display = 'inline'; document.getElementById('2011.08232v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.08232v3-abstract-full" style="display: none;"> Splines are one of the main methods of mathematically representing complicated shapes, which have become the primary technique in the fields of Computer Graphics (CG) and Computer-Aided Geometric Design (CAGD) for modeling complex surfaces. Among all, B茅zier and Catmull-Rom splines are the most common in the sub-fields of engineering. In this paper, we focus on conversion between cubic B茅zier and Catmull-Rom curve segments, rather than going through their properties. By deriving the conversion equations, we aim at converting the original set of the control points of either of the Catmull-Rom or B茅zier cubic curves to a new set of control points, which corresponds to approximately the same shape as the original curve, when considered as the set of the control points of the other curve. Due to providing simple linear transformations of control points, the method is very simple, efficient, and easy to implement, which is further validated in this paper using some numerical and visual examples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.08232v3-abstract-full').style.display = 'none'; document.getElementById('2011.08232v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in SN Computer Science</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> SN COMPUT. SCI. 2, 398 (2021) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.04896">arXiv:2011.04896</a> <span> [<a href="https://arxiv.org/pdf/2011.04896">pdf</a>, <a href="https://arxiv.org/ps/2011.04896">ps</a>, <a href="https://arxiv.org/format/2011.04896">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Empirical Study on Text-Independent Speaker Verification based on the GE2E Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Arasteh%2C+S+T">Soroosh Tayebi Arasteh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.04896v4-abstract-short" style="display: inline;"> While many researchers in the speaker recognition area have started to replace the former classical state-of-the-art methods with deep learning techniques, some of the traditional i-vector-based methods are still state-of-the-art in the context of text-independent speaker verification. Google's Generalized End-to-End Loss for Speaker Verification (GE2E), a deep learning-based technique using long… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.04896v4-abstract-full').style.display = 'inline'; document.getElementById('2011.04896v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.04896v4-abstract-full" style="display: none;"> While many researchers in the speaker recognition area have started to replace the former classical state-of-the-art methods with deep learning techniques, some of the traditional i-vector-based methods are still state-of-the-art in the context of text-independent speaker verification. Google's Generalized End-to-End Loss for Speaker Verification (GE2E), a deep learning-based technique using long short-term memory units, has recently gained a lot of attention due to its speed in convergence and generalization. In this study, we aim at further studying the GE2E method and comparing different scenarios in order to investigate all of its aspects. Various experiments including the effects of a random sampling of test and enrollment utterances, test utterance duration, and the number of enrollment utterances are discussed in this article. Furthermore, we compare the GE2E method with the baseline state-of-the-art i-vector-based methods for text-independent speaker verification and show that it outperforms them by resulting in lower error rates while being end-to-end and requiring less training time for convergence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.04896v4-abstract-full').style.display = 'none'; document.getElementById('2011.04896v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 7 tables, 2 figures, 4 algorithms. An empirical study on the paper arXiv:1710.10467 by Wan et al. (2017)</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository