Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–14 of 14 results for author: <span class="mathjax">Brettin, T</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/q-bio" aria-role="search"> Searching in archive <strong>q-bio</strong>. <a href="/search/?searchtype=author&query=Brettin%2C+T">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Brettin, T"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Brettin%2C+T&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Brettin, T"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00709">arXiv:2410.00709</a> <span> [<a href="https://arxiv.org/pdf/2410.00709">pdf</a>, <a href="https://arxiv.org/format/2410.00709">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Binding Affinity Prediction: From Conventional to Machine Learning-Based Approaches </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Liu%2C+X">Xuefeng Liu</a>, <a href="/search/q-bio?searchtype=author&query=Jiang%2C+S">Songhao Jiang</a>, <a href="/search/q-bio?searchtype=author&query=Duan%2C+X">Xiaotian Duan</a>, <a href="/search/q-bio?searchtype=author&query=Vasan%2C+A">Archit Vasan</a>, <a href="/search/q-bio?searchtype=author&query=Liu%2C+C">Chong Liu</a>, <a href="/search/q-bio?searchtype=author&query=Tien%2C+C">Chih-chan Tien</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Heng Ma</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Xia%2C+F">Fangfang Xia</a>, <a href="/search/q-bio?searchtype=author&query=Foster%2C+I+T">Ian T. Foster</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R+L">Rick L. Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00709v1-abstract-short" style="display: inline;"> Protein-ligand binding is the process by which a small molecule (drug or inhibitor) attaches to a target protein. The binding affinity, which refers to the strength of this interaction, is central to many important problems in bioinformatics such as drug design. An extensive amount of work has been devoted to predicting binding affinity over the past decades due to its significance. In this paper,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00709v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00709v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00709v1-abstract-full" style="display: none;"> Protein-ligand binding is the process by which a small molecule (drug or inhibitor) attaches to a target protein. The binding affinity, which refers to the strength of this interaction, is central to many important problems in bioinformatics such as drug design. An extensive amount of work has been devoted to predicting binding affinity over the past decades due to its significance. In this paper, we review all significant recent works, focusing on the methods, features, and benchmark datasets. We have observed a rising trend in the use of traditional machine learning and deep learning models for predicting binding affinity, accompanied by an increasing amount of data on proteins and small drug-like molecules. While prediction results are constantly improving, we also identify several open questions and potential directions that remain unexplored in the field. This paper could serve as an excellent starting point for machine learning researchers who wish to engage in the study of binding affinity, or for anyone with general interests in machine learning, drug discovery, and bioinformatics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00709v1-abstract-full').style.display = 'none'; document.getElementById('2410.00709v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12215">arXiv:2409.12215</a> <span> [<a href="https://arxiv.org/pdf/2409.12215">pdf</a>, <a href="https://arxiv.org/format/2409.12215">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Assessing Reusability of Deep Learning-Based Monotherapy Drug Response Prediction Models Trained with Omics Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Overbeek%2C+J+C">Jamie C. Overbeek</a>, <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T+S">Thomas S. Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Chia%2C+N">Nicholas Chia</a>, <a href="/search/q-bio?searchtype=author&query=Narykov%2C+O">Oleksandr Narykov</a>, <a href="/search/q-bio?searchtype=author&query=Vasanthakumari%2C+P">Priyanka Vasanthakumari</a>, <a href="/search/q-bio?searchtype=author&query=Wilke%2C+A">Andreas Wilke</a>, <a href="/search/q-bio?searchtype=author&query=Zhu%2C+Y">Yitan Zhu</a>, <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Jones%2C+S">Sara Jones</a>, <a href="/search/q-bio?searchtype=author&query=Gnanaolivu%2C+R">Rohan Gnanaolivu</a>, <a href="/search/q-bio?searchtype=author&query=Liu%2C+Y">Yuanhang Liu</a>, <a href="/search/q-bio?searchtype=author&query=Jiang%2C+J">Jun Jiang</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+C">Chen Wang</a>, <a href="/search/q-bio?searchtype=author&query=Knutson%2C+C">Carter Knutson</a>, <a href="/search/q-bio?searchtype=author&query=McNaughton%2C+A">Andrew McNaughton</a>, <a href="/search/q-bio?searchtype=author&query=Kumar%2C+N">Neeraj Kumar</a>, <a href="/search/q-bio?searchtype=author&query=Fernando%2C+G+D">Gayara Demini Fernando</a>, <a href="/search/q-bio?searchtype=author&query=Ghosh%2C+S">Souparno Ghosh</a>, <a href="/search/q-bio?searchtype=author&query=Sanchez-Villalobos%2C+C">Cesar Sanchez-Villalobos</a>, <a href="/search/q-bio?searchtype=author&query=Zhang%2C+R">Ruibo Zhang</a>, <a href="/search/q-bio?searchtype=author&query=Pal%2C+R">Ranadip Pal</a>, <a href="/search/q-bio?searchtype=author&query=Weil%2C+M+R">M. Ryan Weil</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R+L">Rick L. Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12215v1-abstract-short" style="display: inline;"> Cancer drug response prediction (DRP) models present a promising approach towards precision oncology, tailoring treatments to individual patient profiles. While deep learning (DL) methods have shown great potential in this area, models that can be successfully translated into clinical practice and shed light on the molecular mechanisms underlying treatment response will likely emerge from collabor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12215v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12215v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12215v1-abstract-full" style="display: none;"> Cancer drug response prediction (DRP) models present a promising approach towards precision oncology, tailoring treatments to individual patient profiles. While deep learning (DL) methods have shown great potential in this area, models that can be successfully translated into clinical practice and shed light on the molecular mechanisms underlying treatment response will likely emerge from collaborative research efforts. This highlights the need for reusable and adaptable models that can be improved and tested by the wider scientific community. In this study, we present a scoring system for assessing the reusability of prediction DRP models, and apply it to 17 peer-reviewed DL-based DRP models. As part of the IMPROVE (Innovative Methodologies and New Data for Predictive Oncology Model Evaluation) project, which aims to develop methods for systematic evaluation and comparison DL models across scientific domains, we analyzed these 17 DRP models focusing on three key categories: software environment, code modularity, and data availability and preprocessing. While not the primary focus, we also attempted to reproduce key performance metrics to verify model behavior and adaptability. Our assessment of 17 DRP models reveals both strengths and shortcomings in model reusability. To promote rigorous practices and open-source sharing, we offer recommendations for developing and sharing prediction models. Following these recommendations can address many of the issues identified in this study, improving model reusability without adding significant burdens on researchers. This work offers the first comprehensive assessment of reusability and reproducibility across diverse DRP models, providing insights into current model sharing practices and promoting standards within the DRP and broader AI-enabled scientific research community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12215v1-abstract-full').style.display = 'none'; document.getElementById('2409.12215v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.11329">arXiv:2310.11329</a> <span> [<a href="https://arxiv.org/pdf/2310.11329">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Influencing factors on false positive rates when classifying tumor cell line response to drug treatment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Vasanthakumari%2C+P">Priyanka Vasanthakumari</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Zhu%2C+Y">Yitan Zhu</a>, <a href="/search/q-bio?searchtype=author&query=Yoo%2C+H">Hyunseung Yoo</a>, <a href="/search/q-bio?searchtype=author&query=Shukla%2C+M">Maulik Shukla</a>, <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Xia%2C+F">Fangfang Xia</a>, <a href="/search/q-bio?searchtype=author&query=Narykov%2C+O">Oleksandr Narykov</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R+L">Rick L. Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.11329v1-abstract-short" style="display: inline;"> Informed selection of drug candidates for laboratory experimentation provides an efficient means of identifying suitable anti-cancer treatments. The advancement of artificial intelligence has led to the development of computational models to predict cancer cell line response to drug treatment. It is important to analyze the false positive rate (FPR) of the models, to increase the number of effecti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.11329v1-abstract-full').style.display = 'inline'; document.getElementById('2310.11329v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.11329v1-abstract-full" style="display: none;"> Informed selection of drug candidates for laboratory experimentation provides an efficient means of identifying suitable anti-cancer treatments. The advancement of artificial intelligence has led to the development of computational models to predict cancer cell line response to drug treatment. It is important to analyze the false positive rate (FPR) of the models, to increase the number of effective treatments identified and to minimize unnecessary laboratory experimentation. Such analysis will also aid in identifying drugs or cancer types that require more data collection to improve model predictions. This work uses an attention based neural network classification model to identify responsive/non-responsive drug treatments across multiple types of cancer cell lines. Two data filtering techniques have been applied to generate 10 data subsets, including removing samples for which dose response curves are poorly fitted and removing samples whose area under the dose response curve (AUC) values are marginal around 0.5 from the training set. One hundred trials of 10-fold cross-validation analysis is performed to test the model prediction performance on all the data subsets and the subset with the best model prediction performance is selected for further analysis. Several error analysis metrics such as the false positive rate (FPR), and the prediction uncertainty are evaluated, and the results are summarized by cancer type and drug mechanism of action (MoA) category. The FPR of cancer type spans between 0.262 and 0.5189, while that of drug MoA category spans almost the full range of [0, 1]. This study identifies cancer types and drug MoAs with high FPRs. Additional drug screening data of these cancer and drug categories may improve response modeling. Our results also demonstrate that the two data filtering approaches help improve the drug response prediction performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.11329v1-abstract-full').style.display = 'none'; document.getElementById('2310.11329v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.10442">arXiv:2211.10442</a> <span> [<a href="https://arxiv.org/pdf/2211.10442">pdf</a>, <a href="https://arxiv.org/format/2211.10442">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Deep learning methods for drug response prediction in cancer: predominant and emerging trends </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T+S">Thomas S. Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Zhu%2C+Y">Yitan Zhu</a>, <a href="/search/q-bio?searchtype=author&query=Narykov%2C+O">Oleksandr Narykov</a>, <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Overbeek%2C+J">Jamie Overbeek</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R+L">Rick L. Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.10442v1-abstract-short" style="display: inline;"> Cancer claims millions of lives yearly worldwide. While many therapies have been made available in recent years, by in large cancer remains unsolved. Exploiting computational predictive models to study and treat cancer holds great promise in improving drug development and personalized design of treatment plans, ultimately suppressing tumors, alleviating suffering, and prolonging lives of patients.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.10442v1-abstract-full').style.display = 'inline'; document.getElementById('2211.10442v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.10442v1-abstract-full" style="display: none;"> Cancer claims millions of lives yearly worldwide. While many therapies have been made available in recent years, by in large cancer remains unsolved. Exploiting computational predictive models to study and treat cancer holds great promise in improving drug development and personalized design of treatment plans, ultimately suppressing tumors, alleviating suffering, and prolonging lives of patients. A wave of recent papers demonstrates promising results in predicting cancer response to drug treatments while utilizing deep learning methods. These papers investigate diverse data representations, neural network architectures, learning methodologies, and evaluations schemes. However, deciphering promising predominant and emerging trends is difficult due to the variety of explored methods and lack of standardized framework for comparing drug response prediction models. To obtain a comprehensive landscape of deep learning methods, we conducted an extensive search and analysis of deep learning models that predict the response to single drug treatments. A total of 60 deep learning-based models have been curated and summary plots were generated. Based on the analysis, observable patterns and prevalence of methods have been revealed. This review allows to better understand the current state of the field and identify major challenges and promising solution paths. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.10442v1-abstract-full').style.display = 'none'; document.getElementById('2211.10442v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.11678">arXiv:2204.11678</a> <span> [<a href="https://arxiv.org/pdf/2204.11678">pdf</a>, <a href="https://arxiv.org/format/2204.11678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Data augmentation and multimodal learning for predicting drug response in patient-derived xenografts from gene expressions and histology images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Zhu%2C+Y">Yitan Zhu</a>, <a href="/search/q-bio?searchtype=author&query=Dolezal%2C+J+M">James M. Dolezal</a>, <a href="/search/q-bio?searchtype=author&query=Kochanny%2C+S">Sara Kochanny</a>, <a href="/search/q-bio?searchtype=author&query=Pearson%2C+A+T">Alexander T. Pearson</a>, <a href="/search/q-bio?searchtype=author&query=Shukla%2C+M">Maulik Shukla</a>, <a href="/search/q-bio?searchtype=author&query=Evrard%2C+Y+A">Yvonne A. Evrard</a>, <a href="/search/q-bio?searchtype=author&query=Doroshow%2C+J+H">James H. Doroshow</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R+L">Rick L. Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.11678v1-abstract-short" style="display: inline;"> Patient-derived xenografts (PDXs) are an appealing platform for preclinical drug studies because the in vivo environment of PDXs helps preserve tumor heterogeneity and usually better mimics drug response of patients with cancer compared to CCLs. We investigate multimodal neural network (MM-Net) and data augmentation for drug response prediction in PDXs. The MM-Net learns to predict response using… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.11678v1-abstract-full').style.display = 'inline'; document.getElementById('2204.11678v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.11678v1-abstract-full" style="display: none;"> Patient-derived xenografts (PDXs) are an appealing platform for preclinical drug studies because the in vivo environment of PDXs helps preserve tumor heterogeneity and usually better mimics drug response of patients with cancer compared to CCLs. We investigate multimodal neural network (MM-Net) and data augmentation for drug response prediction in PDXs. The MM-Net learns to predict response using drug descriptors, gene expressions (GE), and histology whole-slide images (WSIs) where the multi-modality refers to the tumor features. We explore whether the integration of WSIs with GE improves predictions as compared with models that use GE alone. We use two methods to address the limited number of response values: 1) homogenize drug representations which allows to combine single-drug and drug-pairs treatments into a single dataset, 2) augment drug-pair samples by switching the order of drug features which doubles the sample size of all drug-pair samples. These methods enable us to combine single-drug and drug-pair treatments, allowing us to train multimodal and unimodal neural networks (NNs) without changing architectures or the dataset. Prediction performance of three unimodal NNs which use GE are compared to assess the contribution of data augmentation methods. NN that uses the full dataset which includes the original and the augmented drug-pair treatments as well as single-drug treatments significantly outperforms NNs that ignore either the augmented drug-pairs or the single-drug treatments. In assessing the contribution of multimodal learning based on the MCC metric, MM-Net statistically significantly outperforms all the baselines. Our results show that data augmentation and integration of histology images with GE can improve prediction performance of drug response in PDXs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.11678v1-abstract-full').style.display = 'none'; document.getElementById('2204.11678v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.07036">arXiv:2106.07036</a> <span> [<a href="https://arxiv.org/pdf/2106.07036">pdf</a>, <a href="https://arxiv.org/format/2106.07036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Protein-Ligand Docking Surrogate Models: A SARS-CoV-2 Benchmark for Deep Learning Accelerated Virtual Screening </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Yoo%2C+H">Hyunseung Yoo</a>, <a href="/search/q-bio?searchtype=author&query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/q-bio?searchtype=author&query=Blaiszik%2C+B">Ben Blaiszik</a>, <a href="/search/q-bio?searchtype=author&query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/q-bio?searchtype=author&query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/q-bio?searchtype=author&query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/q-bio?searchtype=author&query=Ramanathan%2C+A">Arvind Ramanathan</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R">Rick Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.07036v2-abstract-short" style="display: inline;"> We propose a benchmark to study surrogate model accuracy for protein-ligand docking. We share a dataset consisting of 200 million 3D complex structures and 2D structure scores across a consistent set of 13 million "in-stock" molecules over 15 receptors, or binding sites, across the SARS-CoV-2 proteome. Our work shows surrogate docking models have six orders of magnitude more throughput than standa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.07036v2-abstract-full').style.display = 'inline'; document.getElementById('2106.07036v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.07036v2-abstract-full" style="display: none;"> We propose a benchmark to study surrogate model accuracy for protein-ligand docking. We share a dataset consisting of 200 million 3D complex structures and 2D structure scores across a consistent set of 13 million "in-stock" molecules over 15 receptors, or binding sites, across the SARS-CoV-2 proteome. Our work shows surrogate docking models have six orders of magnitude more throughput than standard docking protocols on the same supercomputer node types. We demonstrate the power of high-speed surrogate models by running each target against 1 billion molecules in under a day (50k predictions per GPU seconds). We showcase a workflow for docking utilizing surrogate ML models as a pre-filter. Our workflow is ten times faster at screening a library of compounds than the standard technique, with an error rate less than 0.01\% of detecting the underlying best scoring 0.1\% of compounds. Our analysis of the speedup explains that to screen more molecules under a docking paradigm, another order of magnitude speedup must come from model accuracy rather than computing speed (which, if increased, will not anymore alter our throughput to screen molecules). We believe this is strong evidence for the community to begin focusing on improving the accuracy of surrogate models to improve the ability to screen massive compound libraries 100x or even 1000x faster than current techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.07036v2-abstract-full').style.display = 'none'; document.getElementById('2106.07036v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.02190">arXiv:2106.02190</a> <span> [<a href="https://arxiv.org/pdf/2106.02190">pdf</a>, <a href="https://arxiv.org/format/2106.02190">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Spatial Graph Attention and Curiosity-driven Policy for Antiviral Drug Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wu%2C+Y">Yulun Wu</a>, <a href="/search/q-bio?searchtype=author&query=Cashman%2C+M">Mikaela Cashman</a>, <a href="/search/q-bio?searchtype=author&query=Choma%2C+N">Nicholas Choma</a>, <a href="/search/q-bio?searchtype=author&query=Prates%2C+%C3%89+T">脡rica T. Prates</a>, <a href="/search/q-bio?searchtype=author&query=Vergara%2C+V+G+M">Ver贸nica G. Melesse Vergara</a>, <a href="/search/q-bio?searchtype=author&query=Shah%2C+M">Manesh Shah</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+A">Andrew Chen</a>, <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T+S">Thomas S. Brettin</a>, <a href="/search/q-bio?searchtype=author&query=de+Jong%2C+W+A">Wibe A. de Jong</a>, <a href="/search/q-bio?searchtype=author&query=Kumar%2C+N">Neeraj Kumar</a>, <a href="/search/q-bio?searchtype=author&query=Head%2C+M+S">Martha S. Head</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R+L">Rick L. Stevens</a>, <a href="/search/q-bio?searchtype=author&query=Nugent%2C+P">Peter Nugent</a>, <a href="/search/q-bio?searchtype=author&query=Jacobson%2C+D+A">Daniel A. Jacobson</a>, <a href="/search/q-bio?searchtype=author&query=Brown%2C+J+B">James B. Brown</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.02190v6-abstract-short" style="display: inline;"> We developed Distilled Graph Attention Policy Network (DGAPN), a reinforcement learning model to generate novel graph-structured chemical representations that optimize user-defined objectives by efficiently navigating a physically constrained domain. The framework is examined on the task of generating molecules that are designed to bind, noncovalently, to functional sites of SARS-CoV-2 proteins. W… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02190v6-abstract-full').style.display = 'inline'; document.getElementById('2106.02190v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.02190v6-abstract-full" style="display: none;"> We developed Distilled Graph Attention Policy Network (DGAPN), a reinforcement learning model to generate novel graph-structured chemical representations that optimize user-defined objectives by efficiently navigating a physically constrained domain. The framework is examined on the task of generating molecules that are designed to bind, noncovalently, to functional sites of SARS-CoV-2 proteins. We present a spatial Graph Attention (sGAT) mechanism that leverages self-attention over both node and edge attributes as well as encoding the spatial structure -- this capability is of considerable interest in synthetic biology and drug discovery. An attentional policy network is introduced to learn the decision rules for a dynamic, fragment-based chemical environment, and state-of-the-art policy gradient techniques are employed to train the network with stability. Exploration is driven by the stochasticity of the action space design and the innovation reward bonuses learned and proposed by random network distillation. In experiments, our framework achieved outstanding results compared to state-of-the-art algorithms, while reducing the complexity of paths to chemical synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.02190v6-abstract-full').style.display = 'none'; document.getElementById('2106.02190v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.08961">arXiv:2104.08961</a> <span> [<a href="https://arxiv.org/pdf/2104.08961">pdf</a>, <a href="https://arxiv.org/format/2104.08961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> A cross-study analysis of drug response prediction in cancer cell lines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Xia%2C+F">Fangfang Xia</a>, <a href="/search/q-bio?searchtype=author&query=Allen%2C+J">Jonathan Allen</a>, <a href="/search/q-bio?searchtype=author&query=Balaprakash%2C+P">Prasanna Balaprakash</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Garcia-Cardona%2C+C">Cristina Garcia-Cardona</a>, <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Cohn%2C+J">Judith Cohn</a>, <a href="/search/q-bio?searchtype=author&query=Doroshow%2C+J">James Doroshow</a>, <a href="/search/q-bio?searchtype=author&query=Duan%2C+X">Xiaotian Duan</a>, <a href="/search/q-bio?searchtype=author&query=Dubinkina%2C+V">Veronika Dubinkina</a>, <a href="/search/q-bio?searchtype=author&query=Evrard%2C+Y">Yvonne Evrard</a>, <a href="/search/q-bio?searchtype=author&query=Fan%2C+Y+J">Ya Ju Fan</a>, <a href="/search/q-bio?searchtype=author&query=Gans%2C+J">Jason Gans</a>, <a href="/search/q-bio?searchtype=author&query=He%2C+S">Stewart He</a>, <a href="/search/q-bio?searchtype=author&query=Lu%2C+P">Pinyi Lu</a>, <a href="/search/q-bio?searchtype=author&query=Maslov%2C+S">Sergei Maslov</a>, <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Shukla%2C+M">Maulik Shukla</a>, <a href="/search/q-bio?searchtype=author&query=Stahlberg%2C+E">Eric Stahlberg</a>, <a href="/search/q-bio?searchtype=author&query=Wozniak%2C+J+M">Justin M. Wozniak</a>, <a href="/search/q-bio?searchtype=author&query=Yoo%2C+H">Hyunseung Yoo</a>, <a href="/search/q-bio?searchtype=author&query=Zaki%2C+G">George Zaki</a>, <a href="/search/q-bio?searchtype=author&query=Zhu%2C+Y">Yitan Zhu</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R">Rick Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.08961v2-abstract-short" style="display: inline;"> To enable personalized cancer treatment, machine learning models have been developed to predict drug response as a function of tumor and drug features. However, most algorithm development efforts have relied on cross validation within a single study to assess model accuracy. While an essential first step, cross validation within a biological data set typically provides an overly optimistic estimat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.08961v2-abstract-full').style.display = 'inline'; document.getElementById('2104.08961v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.08961v2-abstract-full" style="display: none;"> To enable personalized cancer treatment, machine learning models have been developed to predict drug response as a function of tumor and drug features. However, most algorithm development efforts have relied on cross validation within a single study to assess model accuracy. While an essential first step, cross validation within a biological data set typically provides an overly optimistic estimate of the prediction performance on independent test sets. To provide a more rigorous assessment of model generalizability between different studies, we use machine learning to analyze five publicly available cell line-based data sets: NCI60, CTRP, GDSC, CCLE and gCSI. Based on observed experimental variability across studies, we explore estimates of prediction upper bounds. We report performance results of a variety of machine learning models, with a multitasking deep neural network achieving the best cross-study generalizability. By multiple measures, models trained on CTRP yield the most accurate predictions on the remaining testing data, and gCSI is the most predictable among the cell line data sets included in this study. With these experiments and further simulations on partial data, two lessons emerge: (1) differences in viability assays can limit model generalizability across studies, and (2) drug diversity, more than tumor diversity, is crucial for raising model generalizability in preclinical screening. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.08961v2-abstract-full').style.display = 'none'; document.getElementById('2104.08961v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Briefings in Bioinformatics</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.02843">arXiv:2103.02843</a> <span> [<a href="https://arxiv.org/pdf/2103.02843">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biological Physics">physics.bio-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1098/rsfs.2021.0018">10.1098/rsfs.2021.0018 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Pandemic Drugs at Pandemic Speed: Infrastructure for Accelerating COVID-19 Drug Discovery with Hybrid Machine Learning- and Physics-based Simulations on High Performance Computers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Bhati%2C+A+P">Agastya P. Bhati</a>, <a href="/search/q-bio?searchtype=author&query=Wan%2C+S">Shunzhou Wan</a>, <a href="/search/q-bio?searchtype=author&query=Alf%C3%A8%2C+D">Dario Alf猫</a>, <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A+R">Austin R. Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Bode%2C+M">Mathis Bode</a>, <a href="/search/q-bio?searchtype=author&query=Tan%2C+L">Li Tan</a>, <a href="/search/q-bio?searchtype=author&query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/q-bio?searchtype=author&query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/q-bio?searchtype=author&query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/q-bio?searchtype=author&query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/q-bio?searchtype=author&query=Highfield%2C+R+R">Roger R. Highfield</a>, <a href="/search/q-bio?searchtype=author&query=Rocchia%2C+W">Walter Rocchia</a>, <a href="/search/q-bio?searchtype=author&query=Scafuri%2C+N">Nicola Scafuri</a>, <a href="/search/q-bio?searchtype=author&query=Succi%2C+S">Sauro Succi</a>, <a href="/search/q-bio?searchtype=author&query=Kranzlm%C3%BCller%2C+D">Dieter Kranzlm眉ller</a>, <a href="/search/q-bio?searchtype=author&query=Mathias%2C+G">Gerald Mathias</a>, <a href="/search/q-bio?searchtype=author&query=Wifling%2C+D">David Wifling</a>, <a href="/search/q-bio?searchtype=author&query=Donon%2C+Y">Yann Donon</a>, <a href="/search/q-bio?searchtype=author&query=Di+Meglio%2C+A">Alberto Di Meglio</a>, <a href="/search/q-bio?searchtype=author&query=Vallecorsa%2C+S">Sofia Vallecorsa</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Heng Ma</a>, <a href="/search/q-bio?searchtype=author&query=Trifan%2C+A">Anda Trifan</a>, <a href="/search/q-bio?searchtype=author&query=Ramanathan%2C+A">Arvind Ramanathan</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Tom Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a> , et al. (4 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.02843v2-abstract-short" style="display: inline;"> The race to meet the challenges of the global pandemic has served as a reminder that the existing drug discovery process is expensive, inefficient and slow. There is a major bottleneck screening the vast number of potential small molecules to shortlist lead compounds for antiviral drug development. New opportunities to accelerate drug discovery lie at the interface between machine learning methods… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.02843v2-abstract-full').style.display = 'inline'; document.getElementById('2103.02843v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.02843v2-abstract-full" style="display: none;"> The race to meet the challenges of the global pandemic has served as a reminder that the existing drug discovery process is expensive, inefficient and slow. There is a major bottleneck screening the vast number of potential small molecules to shortlist lead compounds for antiviral drug development. New opportunities to accelerate drug discovery lie at the interface between machine learning methods, in this case developed for linear accelerators, and physics-based methods. The two in silico methods, each have their own advantages and limitations which, interestingly, complement each other. Here, we present an innovative infrastructural development that combines both approaches to accelerate drug discovery. The scale of the potential resulting workflow is such that it is dependent on supercomputing to achieve extremely high throughput. We have demonstrated the viability of this workflow for the study of inhibitors for four COVID-19 target proteins and our ability to perform the required large-scale calculations to identify lead antiviral compounds through repurposing on a variety of supercomputers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.02843v2-abstract-full').style.display = 'none'; document.getElementById('2103.02843v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Interface Focus. 2021. 11 (6): 20210018 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.12466">arXiv:2011.12466</a> <span> [<a href="https://arxiv.org/pdf/2011.12466">pdf</a>, <a href="https://arxiv.org/format/2011.12466">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning Curves for Drug Response Prediction in Cancer Cell Lines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Evrard%2C+Y+A">Yvonne A. Evrard</a>, <a href="/search/q-bio?searchtype=author&query=Zhu%2C+Y">Yitan Zhu</a>, <a href="/search/q-bio?searchtype=author&query=Yoo%2C+H">Hyunseung Yoo</a>, <a href="/search/q-bio?searchtype=author&query=Xia%2C+F">Fangfang Xia</a>, <a href="/search/q-bio?searchtype=author&query=Jiang%2C+S">Songhao Jiang</a>, <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Shukla%2C+M">Maulik Shukla</a>, <a href="/search/q-bio?searchtype=author&query=Fonstein%2C+M">Michael Fonstein</a>, <a href="/search/q-bio?searchtype=author&query=Doroshow%2C+J+H">James H. Doroshow</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R">Rick Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.12466v1-abstract-short" style="display: inline;"> Motivated by the size of cell line drug sensitivity data, researchers have been developing machine learning (ML) models for predicting drug response to advance cancer treatment. As drug sensitivity studies continue generating data, a common question is whether the proposed predictors can further improve the generalization performance with more training data. We utilize empirical learning curves fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.12466v1-abstract-full').style.display = 'inline'; document.getElementById('2011.12466v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.12466v1-abstract-full" style="display: none;"> Motivated by the size of cell line drug sensitivity data, researchers have been developing machine learning (ML) models for predicting drug response to advance cancer treatment. As drug sensitivity studies continue generating data, a common question is whether the proposed predictors can further improve the generalization performance with more training data. We utilize empirical learning curves for evaluating and comparing the data scaling properties of two neural networks (NNs) and two gradient boosting decision tree (GBDT) models trained on four drug screening datasets. The learning curves are accurately fitted to a power law model, providing a framework for assessing the data scaling behavior of these predictors. The curves demonstrate that no single model dominates in terms of prediction performance across all datasets and training sizes, suggesting that the shape of these curves depends on the unique model-dataset pair. The multi-input NN (mNN), in which gene expressions and molecular drug descriptors are input into separate subnetworks, outperforms a single-input NN (sNN), where the cell and drug features are concatenated for the input layer. In contrast, a GBDT with hyperparameter tuning exhibits superior performance as compared with both NNs at the lower range of training sizes for two of the datasets, whereas the mNN performs better at the higher range of training sizes. Moreover, the trajectory of the curves suggests that increasing the sample size is expected to further improve prediction scores of both NNs. These observations demonstrate the benefit of using learning curves to evaluate predictors, providing a broader perspective on the overall data scaling characteristics. The fitted power law curves provide a forward-looking performance metric and can serve as a co-design tool to guide experimental biologists and computational scientists in the design of future experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.12466v1-abstract-full').style.display = 'none'; document.getElementById('2011.12466v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.06574">arXiv:2010.06574</a> <span> [<a href="https://arxiv.org/pdf/2010.06574">pdf</a>, <a href="https://arxiv.org/format/2010.06574">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> IMPECCABLE: Integrated Modeling PipelinE for COVID Cure by Assessing Better LEads </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Saadi%2C+A+A">Aymen Al Saadi</a>, <a href="/search/q-bio?searchtype=author&query=Alfe%2C+D">Dario Alfe</a>, <a href="/search/q-bio?searchtype=author&query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/q-bio?searchtype=author&query=Bhati%2C+A">Agastya Bhati</a>, <a href="/search/q-bio?searchtype=author&query=Blaiszik%2C+B">Ben Blaiszik</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Chard%2C+K">Kyle Chard</a>, <a href="/search/q-bio?searchtype=author&query=Chard%2C+R">Ryan Chard</a>, <a href="/search/q-bio?searchtype=author&query=Coveney%2C+P">Peter Coveney</a>, <a href="/search/q-bio?searchtype=author&query=Trifan%2C+A">Anda Trifan</a>, <a href="/search/q-bio?searchtype=author&query=Brace%2C+A">Alex Brace</a>, <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Foster%2C+I">Ian Foster</a>, <a href="/search/q-bio?searchtype=author&query=Gibbs%2C+T">Tom Gibbs</a>, <a href="/search/q-bio?searchtype=author&query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/q-bio?searchtype=author&query=Keipert%2C+K">Kristopher Keipert</a>, <a href="/search/q-bio?searchtype=author&query=Kurth%2C+T">Thorsten Kurth</a>, <a href="/search/q-bio?searchtype=author&query=Kranzlm%C3%BCller%2C+D">Dieter Kranzlm眉ller</a>, <a href="/search/q-bio?searchtype=author&query=Lee%2C+H">Hyungro Lee</a>, <a href="/search/q-bio?searchtype=author&query=Li%2C+Z">Zhuozhao Li</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Heng Ma</a>, <a href="/search/q-bio?searchtype=author&query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/q-bio?searchtype=author&query=Mathias%2C+G">Gerald Mathias</a>, <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Yin%2C+J">Junqi Yin</a> , et al. (11 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.06574v1-abstract-short" style="display: inline;"> The drug discovery process currently employed in the pharmaceutical industry typically requires about 10 years and $2-3 billion to deliver one new drug. This is both too expensive and too slow, especially in emergencies like the COVID-19 pandemic. In silicomethodologies need to be improved to better select lead compounds that can proceed to later stages of the drug discovery protocol accelerating… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06574v1-abstract-full').style.display = 'inline'; document.getElementById('2010.06574v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.06574v1-abstract-full" style="display: none;"> The drug discovery process currently employed in the pharmaceutical industry typically requires about 10 years and $2-3 billion to deliver one new drug. This is both too expensive and too slow, especially in emergencies like the COVID-19 pandemic. In silicomethodologies need to be improved to better select lead compounds that can proceed to later stages of the drug discovery protocol accelerating the entire process. No single methodological approach can achieve the necessary accuracy with required efficiency. Here we describe multiple algorithmic innovations to overcome this fundamental limitation, development and deployment of computational infrastructure at scale integrates multiple artificial intelligence and simulation-based approaches. Three measures of performance are:(i) throughput, the number of ligands per unit time; (ii) scientific performance, the number of effective ligands sampled per unit time and (iii) peak performance, in flop/s. The capabilities outlined here have been used in production for several months as the workhorse of the computational infrastructure to support the capabilities of the US-DOE National Virtual Biotechnology Laboratory in combination with resources from the EU Centre of Excellence in Computational Biomedicine. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06574v1-abstract-full').style.display = 'none'; document.getElementById('2010.06574v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.02431">arXiv:2006.02431</a> <span> [<a href="https://arxiv.org/pdf/2006.02431">pdf</a>, <a href="https://arxiv.org/format/2006.02431">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Targeting SARS-CoV-2 with AI- and HPC-enabled Lead Generation: A First Data Release </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/q-bio?searchtype=author&query=Blaiszik%2C+B">Ben Blaiszik</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Tom Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Chard%2C+K">Kyle Chard</a>, <a href="/search/q-bio?searchtype=author&query=Chard%2C+R">Ryan Chard</a>, <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Foster%2C+I">Ian Foster</a>, <a href="/search/q-bio?searchtype=author&query=Hong%2C+Z">Zhi Hong</a>, <a href="/search/q-bio?searchtype=author&query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/q-bio?searchtype=author&query=Li%2C+Z">Zhuozhao Li</a>, <a href="/search/q-bio?searchtype=author&query=Liu%2C+X">Xuefeng Liu</a>, <a href="/search/q-bio?searchtype=author&query=Ramanathan%2C+A">Arvind Ramanathan</a>, <a href="/search/q-bio?searchtype=author&query=Ren%2C+Y">Yi Ren</a>, <a href="/search/q-bio?searchtype=author&query=Saint%2C+N">Nicholaus Saint</a>, <a href="/search/q-bio?searchtype=author&query=Schwarting%2C+M">Marcus Schwarting</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R">Rick Stevens</a>, <a href="/search/q-bio?searchtype=author&query=van+Dam%2C+H">Hubertus van Dam</a>, <a href="/search/q-bio?searchtype=author&query=Wagner%2C+R">Rick Wagner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.02431v1-abstract-short" style="display: inline;"> Researchers across the globe are seeking to rapidly repurpose existing drugs or discover new drugs to counter the the novel coronavirus disease (COVID-19) caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). One promising approach is to train machine learning (ML) and artificial intelligence (AI) tools to screen large numbers of small molecules. As a contribution to that effort,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.02431v1-abstract-full').style.display = 'inline'; document.getElementById('2006.02431v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.02431v1-abstract-full" style="display: none;"> Researchers across the globe are seeking to rapidly repurpose existing drugs or discover new drugs to counter the the novel coronavirus disease (COVID-19) caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). One promising approach is to train machine learning (ML) and artificial intelligence (AI) tools to screen large numbers of small molecules. As a contribution to that effort, we are aggregating numerous small molecules from a variety of sources, using high-performance computing (HPC) to computer diverse properties of those molecules, using the computed properties to train ML/AI models, and then using the resulting models for screening. In this first data release, we make available 23 datasets collected from community sources representing over 4.2 B molecules enriched with pre-computed: 1) molecular fingerprints to aid similarity searches, 2) 2D images of molecules to enable exploration and application of image-based deep learning methods, and 3) 2D and 3D molecular descriptors to speed development of machine learning models. This data release encompasses structural information on the 4.2 B molecules and 60 TB of pre-computed data. Future releases will expand the data to include more detailed molecular simulations, computed models, and other products. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.02431v1-abstract-full').style.display = 'none'; document.getElementById('2006.02431v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.09572">arXiv:2005.09572</a> <span> [<a href="https://arxiv.org/pdf/2005.09572">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Ensemble Transfer Learning for the Prediction of Anti-Cancer Drug Response </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Zhu%2C+Y">Yitan Zhu</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Evrard%2C+Y+A">Yvonne A. Evrard</a>, <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Xia%2C+F">Fangfang Xia</a>, <a href="/search/q-bio?searchtype=author&query=Shukla%2C+M">Maulik Shukla</a>, <a href="/search/q-bio?searchtype=author&query=Yoo%2C+H">Hyunseung Yoo</a>, <a href="/search/q-bio?searchtype=author&query=Doroshow%2C+J+H">James H. Doroshow</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R">Rick Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.09572v1-abstract-short" style="display: inline;"> Transfer learning has been shown to be effective in many applications in which training data for the target problem are limited but data for a related (source) problem are abundant. In this paper, we apply transfer learning to the prediction of anti-cancer drug response. Previous transfer learning studies for drug response prediction focused on building models that predict the response of tumor ce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.09572v1-abstract-full').style.display = 'inline'; document.getElementById('2005.09572v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.09572v1-abstract-full" style="display: none;"> Transfer learning has been shown to be effective in many applications in which training data for the target problem are limited but data for a related (source) problem are abundant. In this paper, we apply transfer learning to the prediction of anti-cancer drug response. Previous transfer learning studies for drug response prediction focused on building models that predict the response of tumor cells to a specific drug treatment. We target the more challenging task of building general prediction models that can make predictions for both new tumor cells and new drugs. We apply the classic transfer learning framework that trains a prediction model on the source dataset and refines it on the target dataset, and extends the framework through ensemble. The ensemble transfer learning pipeline is implemented using LightGBM and two deep neural network (DNN) models with different architectures. Uniquely, we investigate its power for three application settings including drug repurposing, precision oncology, and new drug development, through different data partition schemes in cross-validation. We test the proposed ensemble transfer learning on benchmark in vitro drug screening datasets, taking one dataset as the source domain and another dataset as the target domain. The analysis results demonstrate the benefit of applying ensemble transfer learning for predicting anti-cancer drug response in all three applications with both LightGBM and DNN models. Compared between the different prediction models, a DNN model with two subnetworks for the inputs of tumor features and drug features separately outperforms LightGBM and the other DNN model that concatenates tumor features and drug features for input in the drug repurposing and precision oncology applications. In the more challenging application of new drug development, LightGBM performs better than the other two DNN models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.09572v1-abstract-full').style.display = 'none'; document.getElementById('2005.09572v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.00095">arXiv:2005.00095</a> <span> [<a href="https://arxiv.org/pdf/2005.00095">pdf</a>, <a href="https://arxiv.org/format/2005.00095">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> A Systematic Approach to Featurization for Cancer Drug Sensitivity Predictions with Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Tom Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Shaulik%2C+M">Maulik Shaulik</a>, <a href="/search/q-bio?searchtype=author&query=Yoo%2C+H">Hyunseung Yoo</a>, <a href="/search/q-bio?searchtype=author&query=Evrard%2C+Y">Yvonne Evrard</a>, <a href="/search/q-bio?searchtype=author&query=Zhu%2C+Y">Yitan Zhu</a>, <a href="/search/q-bio?searchtype=author&query=Xia%2C+F">Fangfang Xia</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R">Rick Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.00095v2-abstract-short" style="display: inline;"> By combining various cancer cell line (CCL) drug screening panels, the size of the data has grown significantly to begin understanding how advances in deep learning can advance drug response predictions. In this paper we train >35,000 neural network models, sweeping over common featurization techniques. We found the RNA-seq to be highly redundant and informative even with subsets larger than 128 f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.00095v2-abstract-full').style.display = 'inline'; document.getElementById('2005.00095v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.00095v2-abstract-full" style="display: none;"> By combining various cancer cell line (CCL) drug screening panels, the size of the data has grown significantly to begin understanding how advances in deep learning can advance drug response predictions. In this paper we train >35,000 neural network models, sweeping over common featurization techniques. We found the RNA-seq to be highly redundant and informative even with subsets larger than 128 features. We found the inclusion of single nucleotide polymorphisms (SNPs) coded as count matrices improved model performance significantly, and no substantial difference in model performance with respect to molecular featurization between the common open source MOrdred descriptors and Dragon7 descriptors. Alongside this analysis, we outline data integration between CCL screening datasets and present evidence that new metrics and imbalanced data techniques, as well as advances in data standardization, need to be developed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.00095v2-abstract-full').style.display = 'none'; document.getElementById('2005.00095v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository