CINXE.COM

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 104 results for author: <span class="mathjax">Wei, G</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/q-bio" aria-role="search"> Searching in archive <strong>q-bio</strong>. <a href="/search/?searchtype=author&query=Wei%2C+G">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Wei, G"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Wei%2C+G&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Wei, G"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wei%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17331">arXiv:2411.17331</a> <span> [<a href="https://arxiv.org/pdf/2411.17331">pdf</a>, <a href="https://arxiv.org/format/2411.17331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Geometric Topology">math.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Multiscale Jones Polynomial and Persistent Jones Polynomial for Knot Data Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Song%2C+R">Ruzhi Song</a>, <a href="/search/q-bio?searchtype=author&query=Li%2C+F">Fengling Li</a>, <a href="/search/q-bio?searchtype=author&query=Wu%2C+J">Jie Wu</a>, <a href="/search/q-bio?searchtype=author&query=Lei%2C+F">Fengchun Lei</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17331v1-abstract-short" style="display: inline;"> Many structures in science, engineering, and art can be viewed as curves in 3-space. The entanglement of these curves plays a crucial role in determining the functionality and physical properties of materials. Many concepts in knot theory provide theoretical tools to explore the complexity and entanglement of curves in 3-space. However, classical knot theory primarily focuses on global topological… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17331v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17331v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17331v1-abstract-full" style="display: none;"> Many structures in science, engineering, and art can be viewed as curves in 3-space. The entanglement of these curves plays a crucial role in determining the functionality and physical properties of materials. Many concepts in knot theory provide theoretical tools to explore the complexity and entanglement of curves in 3-space. However, classical knot theory primarily focuses on global topological properties and lacks the consideration of local structural information, which is critical in practical applications. In this work, two localized models based on the Jones polynomial, namely the multiscale Jones polynomial and the persistent Jones polynomial, are proposed. The stability of these models, especially the insensitivity of the multiscale and persistent Jones polynomial models to small perturbations in curve collections, is analyzed, thus ensuring their robustness for real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17331v1-abstract-full').style.display = 'none'; document.getElementById('2411.17331v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 57K14; 92C10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12370">arXiv:2411.12370</a> <span> [<a href="https://arxiv.org/pdf/2411.12370">pdf</a>, <a href="https://arxiv.org/format/2411.12370">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Rapid response to fast viral evolution using AlphaFold 3-assisted topological deep learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wee%2C+J">JunJie Wee</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12370v1-abstract-short" style="display: inline;"> The fast evolution of SARS-CoV-2 and other infectious viruses poses a grand challenge to the rapid response in terms of viral tracking, diagnostics, and design and manufacture of monoclonal antibodies (mAbs) and vaccines, which are both time-consuming and costly. This underscores the need for efficient computational approaches. Recent advancements, like topological deep learning (TDL), have introd… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12370v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12370v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12370v1-abstract-full" style="display: none;"> The fast evolution of SARS-CoV-2 and other infectious viruses poses a grand challenge to the rapid response in terms of viral tracking, diagnostics, and design and manufacture of monoclonal antibodies (mAbs) and vaccines, which are both time-consuming and costly. This underscores the need for efficient computational approaches. Recent advancements, like topological deep learning (TDL), have introduced powerful tools for forecasting emerging dominant variants, yet they require deep mutational scanning (DMS) of viral surface proteins and associated three-dimensional (3D) protein-protein interaction (PPI) complex structures. We propose an AlphaFold 3 (AF3)-assisted multi-task topological Laplacian (MT-TopLap) strategy to address this need. MT-TopLap combines deep learning with topological data analysis (TDA) models, such as persistent Laplacians (PL) to extract detailed topological and geometric characteristics of PPIs, thereby enhancing the prediction of DMS and binding free energy (BFE) changes upon virus mutations. Validation with four experimental DMS datasets of SARS-CoV-2 spike receptor-binding domain (RBD) and the human angiotensin-converting enzyme-2 (ACE2) complexes indicates that our AF3 assisted MT-TopLap strategy maintains robust performance, with only an average 1.1% decrease in Pearson correlation coefficients (PCC) and an average 9.3% increase in root mean square errors (RMSE), compared with the use of experimental structures. Additionally, AF3-assisted MT-TopLap achieved a PCC of 0.81 when tested with a SARS-CoV-2 HK.3 variant DMS dataset, confirming its capability to accurately predict BFE changes and adapt to new experimental data, thereby showcasing its potential for rapid and effective response to fast viral evolution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12370v1-abstract-full').style.display = 'none'; document.getElementById('2411.12370v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03112">arXiv:2411.03112</a> <span> [<a href="https://arxiv.org/pdf/2411.03112">pdf</a>, <a href="https://arxiv.org/format/2411.03112">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Multiscale differential geometry learning for protein flexibility analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Zhao%2C+J+Y">Jeffrey Y. Zhao</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03112v1-abstract-short" style="display: inline;"> Protein flexibility is crucial for understanding protein structures, functions, and dynamics, and it can be measured through experimental methods such as X-ray crystallography. Theoretical approaches have also been developed to predict B-factor values, which reflect protein flexibility. Previous models have made significant strides in analyzing B-factors by fitting experimental data. In this study… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03112v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03112v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03112v1-abstract-full" style="display: none;"> Protein flexibility is crucial for understanding protein structures, functions, and dynamics, and it can be measured through experimental methods such as X-ray crystallography. Theoretical approaches have also been developed to predict B-factor values, which reflect protein flexibility. Previous models have made significant strides in analyzing B-factors by fitting experimental data. In this study, we propose a novel approach for B-factor prediction using differential geometry theory, based on the assumption that the intrinsic properties of proteins reside on a family of low-dimensional manifolds embedded within the high-dimensional space of protein structures. By analyzing the mean and Gaussian curvatures of a set of kernel-function-defined low-dimensional manifolds, we develop effective and robust multiscale differential geometry (mDG) models. Our mDG model demonstrates a 27\% increase in accuracy compared to the classical Gaussian network model (GNM) in predicting B-factors for a dataset of 364 proteins. Additionally, by incorporating both global and local protein features, we construct a highly effective machine learning model for the blind prediction of B-factors. Extensive least-squares approximations and machine learning-based blind predictions validate the effectiveness of the mDG modeling approach for B-factor prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03112v1-abstract-full').style.display = 'none'; document.getElementById('2411.03112v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02596">arXiv:2411.02596</a> <span> [<a href="https://arxiv.org/pdf/2411.02596">pdf</a>, <a href="https://arxiv.org/format/2411.02596">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Persistent Directed Flag Laplacian (PDFL)-Based Machine Learning for Protein-Ligand Binding Affinity Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Zia%2C+M">Mushal Zia</a>, <a href="/search/q-bio?searchtype=author&query=Jones%2C+B">Benjamin Jones</a>, <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02596v2-abstract-short" style="display: inline;"> Directionality in molecular and biomolecular networks plays a significant role in the accurate represention of the complex, dynamic, and asymmetrical nature of interactions present in protein-ligand binding, signal transduction, and biological pathways. Most traditional techniques of topological data analysis (TDA), such as persistent homology (PH) and persistent Laplacian (PL), overlook this aspe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02596v2-abstract-full').style.display = 'inline'; document.getElementById('2411.02596v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02596v2-abstract-full" style="display: none;"> Directionality in molecular and biomolecular networks plays a significant role in the accurate represention of the complex, dynamic, and asymmetrical nature of interactions present in protein-ligand binding, signal transduction, and biological pathways. Most traditional techniques of topological data analysis (TDA), such as persistent homology (PH) and persistent Laplacian (PL), overlook this aspect in their standard form. To address this, we present the persistent directed flag Laplacian (PDFL), which incorporates directed flag complexes to account for edges with directionality originated from polarization, gene regulation, heterogeneous interactions, etc. This study marks the first application of the PDFL, providing an in-depth analysis of spectral graph theory combined with machine learning. Besides its superior accuracy and reliability, the PDFL model offers simplicity by requiring only raw inputs without complex data processing. We validated our multi-kernel PDFL model for its scoring power against other state-of-art methods on three popular benchmarks, namely PDBbind v2007, v2013, and v2016. Computational results indicate that the proposed PDFL model outperforms competitors in protein-ligand binding affinity predictions, indicating that PDFL is a promising tool for protein engineering, drug discovery, and general applications in science and engineering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02596v2-abstract-full').style.display = 'none'; document.getElementById('2411.02596v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19717">arXiv:2409.19717</a> <span> [<a href="https://arxiv.org/pdf/2409.19717">pdf</a>, <a href="https://arxiv.org/ps/2409.19717">ps</a>, <a href="https://arxiv.org/format/2409.19717">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Nonparametric Covariance Regression for Massive Neural Data on Restricted Covariates via Graph </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Ganchao Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19717v1-abstract-short" style="display: inline;"> Modern recording techniques enable neuroscientists to simultaneously study neural activity across large populations of neurons, with capturing predictor-dependent correlations being a fundamental challenge in neuroscience. Moreover, the fact that input covariates often lie in restricted subdomains, according to experimental settings, makes inference even more challenging. To address these challeng… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19717v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19717v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19717v1-abstract-full" style="display: none;"> Modern recording techniques enable neuroscientists to simultaneously study neural activity across large populations of neurons, with capturing predictor-dependent correlations being a fundamental challenge in neuroscience. Moreover, the fact that input covariates often lie in restricted subdomains, according to experimental settings, makes inference even more challenging. To address these challenges, we propose a set of nonparametric mean-covariance regression models for high-dimensional neural activity with restricted inputs. These models reduce the dimensionality of neural responses by employing a lower-dimensional latent factor model, where both factor loadings and latent factors are predictor-dependent, to jointly model mean and covariance across covariates. The smoothness of neural activity across experimental conditions is modeled nonparametrically using two Gaussian processes (GPs), applied to both loading basis and latent factors. Additionally, to account for the covariates lying in restricted subspace, we incorporate graph information into the covariance structure. To flexibly infer the model, we use an MCMC algorithm to sample from posterior distributions. After validating and studying the properties of proposed methods by simulations, we apply them to two neural datasets (local field potential and neural spiking data) to demonstrate the usage of models for continuous and counting observations. Overall, the proposed methods provide a framework to jointly model covariate-dependent mean and covariance in high dimensional neural data, especially when the covariates lie in restricted domains. The framework is general and can be easily adapted to various applications beyond neuroscience. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19717v1-abstract-full').style.display = 'none'; document.getElementById('2409.19717v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13299">arXiv:2408.13299</a> <span> [<a href="https://arxiv.org/pdf/2408.13299">pdf</a>, <a href="https://arxiv.org/format/2408.13299">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Algebraic Topology">math.AT</span> </div> </div> <p class="title is-5 mathjax"> Mayer-homology learning prediction of protein-ligand binding affinities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Shen%2C+L">Li Shen</a>, <a href="/search/q-bio?searchtype=author&query=Liu%2C+J">Jian Liu</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13299v1-abstract-short" style="display: inline;"> Artificial intelligence-assisted drug design is revolutionizing the pharmaceutical industry. Effective molecular features are crucial for accurate machine learning predictions, and advanced mathematics plays a key role in designing these features. Persistent homology theory, which equips topological invariants with persistence, provides valuable insights into molecular structures. The calculation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13299v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13299v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13299v1-abstract-full" style="display: none;"> Artificial intelligence-assisted drug design is revolutionizing the pharmaceutical industry. Effective molecular features are crucial for accurate machine learning predictions, and advanced mathematics plays a key role in designing these features. Persistent homology theory, which equips topological invariants with persistence, provides valuable insights into molecular structures. The calculation of Betti numbers is based on differential that typically satisfy $d^2 = 0$. Our recent work has extended this concept by employing Mayer homology with a generalized differential that satisfies $d^N = 0$ for $N \geq 2$, leading to the development of persistent Mayer homology (PMH) theory and richer topological information across various scales. In this study, we utilize PMH to create a novel multiscale topological vectorization for molecular representation, offering valuable tools for descriptive and predictive analysis in molecular data and machine learning prediction. Specifically, benchmark tests on established protein-ligand datasets, including PDBbind-2007, PDBbind-2013, and PDBbind-2016, demonstrate the superior performance of our Mayer homology models in predicting protein-ligand binding affinities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13299v1-abstract-full').style.display = 'none'; document.getElementById('2408.13299v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.06479">arXiv:2406.06479</a> <span> [<a href="https://arxiv.org/pdf/2406.06479">pdf</a>, <a href="https://arxiv.org/format/2406.06479">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Graph-Based Bidirectional Transformer Decision Threshold Adjustment Algorithm for Class-Imbalanced Molecular Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Hayes%2C+N">Nicole Hayes</a>, <a href="/search/q-bio?searchtype=author&query=Merkurjev%2C+E">Ekaterina Merkurjev</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.06479v3-abstract-short" style="display: inline;"> Data sets with imbalanced class sizes, where one class size is much smaller than that of others, occur exceedingly often in many applications, including those with biological foundations, such as disease diagnosis and drug discovery. Therefore, it is extremely important to be able to identify data elements of classes of various sizes, as a failure to do so can result in heavy costs. Nonetheless, m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06479v3-abstract-full').style.display = 'inline'; document.getElementById('2406.06479v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.06479v3-abstract-full" style="display: none;"> Data sets with imbalanced class sizes, where one class size is much smaller than that of others, occur exceedingly often in many applications, including those with biological foundations, such as disease diagnosis and drug discovery. Therefore, it is extremely important to be able to identify data elements of classes of various sizes, as a failure to do so can result in heavy costs. Nonetheless, many data classification procedures do not perform well on imbalanced data sets as they often fail to detect elements belonging to underrepresented classes. In this work, we propose the BTDT-MBO algorithm, incorporating Merriman-Bence-Osher (MBO) approaches and a bidirectional transformer, as well as distance correlation and decision threshold adjustments, for data classification tasks on highly imbalanced molecular data sets, where the sizes of the classes vary greatly. The proposed technique not only integrates adjustments in the classification threshold for the MBO algorithm in order to help deal with the class imbalance, but also uses a bidirectional transformer procedure based on an attention mechanism for self-supervised learning. In addition, the model implements distance correlation as a weight function for the similarity graph-based framework on which the adjusted MBO algorithm operates. The proposed method is validated using six molecular data sets and compared to other related techniques. The computational experiments show that the proposed technique is superior to competing approaches even in the case of a high class imbalance ratio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06479v3-abstract-full').style.display = 'none'; document.getElementById('2406.06479v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.03979">arXiv:2406.03979</a> <span> [<a href="https://arxiv.org/pdf/2406.03979">pdf</a>, <a href="https://arxiv.org/format/2406.03979">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Algebraic Topology">math.AT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking AlphaFold3's protein-protein complex accuracy and machine learning prediction reliability for binding free energy changes upon mutation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wee%2C+J">JunJie Wee</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.03979v1-abstract-short" style="display: inline;"> AlphaFold 3 (AF3), the latest version of protein structure prediction software, goes beyond its predecessors by predicting protein-protein complexes. It could revolutionize drug discovery and protein engineering, marking a major step towards comprehensive, automated protein structure prediction. However, independent validation of AF3's predictions is necessary. Evaluated using the SKEMPI 2.0 datab… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03979v1-abstract-full').style.display = 'inline'; document.getElementById('2406.03979v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.03979v1-abstract-full" style="display: none;"> AlphaFold 3 (AF3), the latest version of protein structure prediction software, goes beyond its predecessors by predicting protein-protein complexes. It could revolutionize drug discovery and protein engineering, marking a major step towards comprehensive, automated protein structure prediction. However, independent validation of AF3's predictions is necessary. Evaluated using the SKEMPI 2.0 database which involves 317 protein-protein complexes and 8338 mutations, AF3 complex structures give rise to a very good Pearson correlation coefficient of 0.86 for predicting protein-protein binding free energy changes upon mutation, slightly less than the 0.88 achieved earlier with the Protein Data Bank (PDB) structures. Nonetheless, AF3 complex structures led to a 8.6% increase in the prediction RMSE compared to original PDB complex structures. Additionally, some of AF3's complex structures have large errors, which were not captured in its ipTM performance metric. Finally, it is found that AF3's complex structures are not reliable for intrinsically flexible regions or domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.03979v1-abstract-full').style.display = 'none'; document.getElementById('2406.03979v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.02603">arXiv:2403.02603</a> <span> [<a href="https://arxiv.org/pdf/2403.02603">pdf</a>, <a href="https://arxiv.org/format/2403.02603">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Drug resistance revealed by in silico deep mutational scanning and mutation tracker </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/q-bio?searchtype=author&query=Liu%2C+G">Gengzhuo Liu</a>, <a href="/search/q-bio?searchtype=author&query=Du%2C+H">Hongyan Du</a>, <a href="/search/q-bio?searchtype=author&query=Wee%2C+J">Junjie Wee</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Shen%2C+J">Jana Shen</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.02603v1-abstract-short" style="display: inline;"> As COVID-19 enters its fifth year, it continues to pose a significant global health threat, with the constantly mutating SARS-CoV-2 virus challenging drug effectiveness. A comprehensive understanding of virus-drug interactions is essential for predicting and improving drug effectiveness, especially in combating drug resistance during the pandemic. In response, the Path Laplacian Transformer-based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.02603v1-abstract-full').style.display = 'inline'; document.getElementById('2403.02603v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.02603v1-abstract-full" style="display: none;"> As COVID-19 enters its fifth year, it continues to pose a significant global health threat, with the constantly mutating SARS-CoV-2 virus challenging drug effectiveness. A comprehensive understanding of virus-drug interactions is essential for predicting and improving drug effectiveness, especially in combating drug resistance during the pandemic. In response, the Path Laplacian Transformer-based Prospective Analysis Framework (PLFormer-PAF) has been proposed, integrating historical data analysis and predictive modeling strategies. This dual-strategy approach utilizes path topology to transform protein-ligand complexes into topological sequences, enabling the use of advanced large language models for analyzing protein-ligand interactions and enhancing its reliability with factual insights garnered from historical data. It has shown unparalleled performance in predicting binding affinity tasks across various benchmarks, including specific evaluations related to SARS-CoV-2, and assesses the impact of virus mutations on drug efficacy, offering crucial insights into potential drug resistance. The predictions align with observed mutation patterns in SARS-CoV-2, indicating that the widespread use of the Pfizer drug has lead to viral evolution and reduced drug efficacy. PLFormer-PAF's capabilities extend beyond identifying drug-resistant strains, positioning it as a key tool in drug discovery research and the development of new therapeutic strategies against fast-mutating viruses like COVID-19. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.02603v1-abstract-full').style.display = 'none'; document.getElementById('2403.02603v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.10261">arXiv:2312.10261</a> <span> [<a href="https://arxiv.org/pdf/2312.10261">pdf</a>, <a href="https://arxiv.org/format/2312.10261">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Molecular Networks">q-bio.MN</span> </div> </div> <p class="title is-5 mathjax"> Multiscale differential geometry learning of networks with applications to single-cell RNA sequencing data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Cottrell%2C+S">Sean Cottrell</a>, <a href="/search/q-bio?searchtype=author&query=Hozumi%2C+Y">Yuta Hozumi</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.10261v1-abstract-short" style="display: inline;"> Single-cell RNA sequencing (scRNA-seq) has emerged as a transformative technology, offering unparalleled insights into the intricate landscape of cellular diversity and gene expression dynamics. The analysis of scRNA-seq data poses challenges attributed to both sparsity and the extensive number of genes implicated. An increasing number of computational tools are devised for analyzing and interpret… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10261v1-abstract-full').style.display = 'inline'; document.getElementById('2312.10261v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.10261v1-abstract-full" style="display: none;"> Single-cell RNA sequencing (scRNA-seq) has emerged as a transformative technology, offering unparalleled insights into the intricate landscape of cellular diversity and gene expression dynamics. The analysis of scRNA-seq data poses challenges attributed to both sparsity and the extensive number of genes implicated. An increasing number of computational tools are devised for analyzing and interpreting scRNA-seq data. We present a multiscale differential geometry (MDG) strategy to exploit the geometric and biological properties inherent in scRNA-seq data. We assume that those intrinsic properties of cells lies on a family of low-dimensional manifolds embedded in the high-dimensional space of scRNA-seq data. Subsequently, we explore these properties via multiscale cell-cell interactive manifolds. Our multiscale curvature-based representation serves as a powerful approach to effectively encapsulate the complex relationships in the cell-cell network. We showcase the utility of our novel approach by demonstrating its effectiveness in classifying cell types. This innovative application of differential geometry in scRNA-seq analysis opens new avenues for understanding the intricacies of biological networks and holds great potential for network analysis in other fields. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10261v1-abstract-full').style.display = 'none'; document.getElementById('2312.10261v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.01272">arXiv:2312.01272</a> <span> [<a href="https://arxiv.org/pdf/2312.01272">pdf</a>, <a href="https://arxiv.org/format/2312.01272">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> Multiscale Topology in Interactomic Network: From Transcriptome to Antiaddiction Drug Repurposing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Du%2C+H">Hongyan Du</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a>, <a href="/search/q-bio?searchtype=author&query=Hou%2C+T">Tingjun Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.01272v1-abstract-short" style="display: inline;"> The escalating drug addiction crisis in the United States underscores the urgent need for innovative therapeutic strategies. This study embarked on an innovative and rigorous strategy to unearth potential drug repurposing candidates for opioid and cocaine addiction treatment, bridging the gap between transcriptomic data analysis and drug discovery. We initiated our approach by conducting different… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01272v1-abstract-full').style.display = 'inline'; document.getElementById('2312.01272v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.01272v1-abstract-full" style="display: none;"> The escalating drug addiction crisis in the United States underscores the urgent need for innovative therapeutic strategies. This study embarked on an innovative and rigorous strategy to unearth potential drug repurposing candidates for opioid and cocaine addiction treatment, bridging the gap between transcriptomic data analysis and drug discovery. We initiated our approach by conducting differential gene expression analysis on addiction-related transcriptomic data to identify key genes. We propose a novel topological differentiation to identify key genes from a protein-protein interaction (PPI) network derived from DEGs. This method utilizes persistent Laplacians to accurately single out pivotal nodes within the network, conducting this analysis in a multiscale manner to ensure high reliability. Through rigorous literature validation, pathway analysis, and data-availability scrutiny, we identified three pivotal molecular targets, mTOR, mGluR5, and NMDAR, for drug repurposing from DrugBank. We crafted machine learning models employing two natural language processing (NLP)-based embeddings and a traditional 2D fingerprint, which demonstrated robust predictive ability in gauging binding affinities of DrugBank compounds to selected targets. Furthermore, we elucidated the interactions of promising drugs with the targets and evaluated their drug-likeness. This study delineates a multi-faceted and comprehensive analytical framework, amalgamating bioinformatics, topological data analysis and machine learning, for drug repurposing in addiction treatment, setting the stage for subsequent experimental validation. The versatility of the methods we developed allows for applications across a range of diseases and transcriptomic datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01272v1-abstract-full').style.display = 'none'; document.getElementById('2312.01272v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12834">arXiv:2311.12834</a> <span> [<a href="https://arxiv.org/pdf/2311.12834">pdf</a>, <a href="https://arxiv.org/format/2311.12834">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Geometric Topology">math.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Knot data analysis using multiscale Gauss link integral </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Shen%2C+L">Li Shen</a>, <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Li%2C+F">Fengling Li</a>, <a href="/search/q-bio?searchtype=author&query=Lei%2C+F">Fengchun Lei</a>, <a href="/search/q-bio?searchtype=author&query=Wu%2C+J">Jie Wu</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12834v1-abstract-short" style="display: inline;"> In the past decade, topological data analysis (TDA) has emerged as a powerful approach in data science. The main technique in TDA is persistent homology, which tracks topological invariants over the filtration of point cloud data using algebraic topology. Although knot theory and related subjects are a focus of study in mathematics, their success in practical applications is quite limited due to t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12834v1-abstract-full').style.display = 'inline'; document.getElementById('2311.12834v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12834v1-abstract-full" style="display: none;"> In the past decade, topological data analysis (TDA) has emerged as a powerful approach in data science. The main technique in TDA is persistent homology, which tracks topological invariants over the filtration of point cloud data using algebraic topology. Although knot theory and related subjects are a focus of study in mathematics, their success in practical applications is quite limited due to the lack of localization and quantization. We address these challenges by introducing knot data analysis (KDA), a new paradigm that incorporating curve segmentation and multiscale analysis into the Gauss link integral. The resulting multiscale Gauss link integral (mGLI) recovers the global topological properties of knots and links at an appropriate scale but offers multiscale feature vectors to capture the local structures and connectivities of each curve segment at various scales. The proposed mGLI significantly outperforms other state-of-the-art methods in benchmark protein flexibility analysis, including earlier persistent homology-based methods. Our approach enables the integration of artificial intelligence (AI) and KDA for general curve-like objects and data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12834v1-abstract-full').style.display = 'none'; document.getElementById('2311.12834v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.18760">arXiv:2310.18760</a> <span> [<a href="https://arxiv.org/pdf/2310.18760">pdf</a>, <a href="https://arxiv.org/format/2310.18760">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Algebraic Topology">math.AT</span> </div> </div> <p class="title is-5 mathjax"> Integration of persistent Laplacian and pre-trained transformer for protein solubility changes upon mutation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wee%2C+J">JunJie Wee</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Xia%2C+K">Kelin Xia</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.18760v2-abstract-short" style="display: inline;"> Protein mutations can significantly influence protein solubility, which results in altered protein functions and leads to various diseases. Despite of tremendous effort, machine learning prediction of protein solubility changes upon mutation remains a challenging task as indicated by the poor scores of normalized Correct Prediction Ratio (CPR). Part of the challenge stems from the fact that there… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18760v2-abstract-full').style.display = 'inline'; document.getElementById('2310.18760v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.18760v2-abstract-full" style="display: none;"> Protein mutations can significantly influence protein solubility, which results in altered protein functions and leads to various diseases. Despite of tremendous effort, machine learning prediction of protein solubility changes upon mutation remains a challenging task as indicated by the poor scores of normalized Correct Prediction Ratio (CPR). Part of the challenge stems from the fact that there is no three-dimensional (3D) structures for the wild-type and mutant proteins. This work integrates persistent Laplacians and pre-trained Transformer for the task. The Transformer, pretrained with hunderds of millions of protein sequences, embeds wild-type and mutant sequences, while persistent Laplacians track the topological invariant change and homotopic shape evolution induced by mutations in 3D protein structures, which are rendered from AlphaFold2. The resulting machine learning model was trained on an extensive data set labeled with three solubility types. Our model outperforms all existing predictive methods and improves the state-of-the-art up to 15%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18760v2-abstract-full').style.display = 'none'; document.getElementById('2310.18760v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.14521">arXiv:2310.14521</a> <span> [<a href="https://arxiv.org/pdf/2310.14521">pdf</a>, <a href="https://arxiv.org/format/2310.14521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Algebraic Topology">math.AT</span> </div> </div> <p class="title is-5 mathjax"> K-Nearest-Neighbors Induced Topological PCA for scRNA Sequence Data Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Cottrell%2C+S">Sean Cottrell</a>, <a href="/search/q-bio?searchtype=author&query=Hozumi%2C+Y">Yuta Hozumi</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.14521v1-abstract-short" style="display: inline;"> Single-cell RNA sequencing (scRNA-seq) is widely used to reveal heterogeneity in cells, which has given us insights into cell-cell communication, cell differentiation, and differential gene expression. However, analyzing scRNA-seq data is a challenge due to sparsity and the large number of genes involved. Therefore, dimensionality reduction and feature selection are important for removing spurious… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14521v1-abstract-full').style.display = 'inline'; document.getElementById('2310.14521v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.14521v1-abstract-full" style="display: none;"> Single-cell RNA sequencing (scRNA-seq) is widely used to reveal heterogeneity in cells, which has given us insights into cell-cell communication, cell differentiation, and differential gene expression. However, analyzing scRNA-seq data is a challenge due to sparsity and the large number of genes involved. Therefore, dimensionality reduction and feature selection are important for removing spurious signals and enhancing downstream analysis. Traditional PCA, a main workhorse in dimensionality reduction, lacks the ability to capture geometrical structure information embedded in the data, and previous graph Laplacian regularizations are limited by the analysis of only a single scale. We propose a topological Principal Components Analysis (tPCA) method by the combination of persistent Laplacian (PL) technique and L$_{2,1}$ norm regularization to address multiscale and multiclass heterogeneity issues in data. We further introduce a k-Nearest-Neighbor (kNN) persistent Laplacian technique to improve the robustness of our persistent Laplacian method. The proposed kNN-PL is a new algebraic topology technique which addresses the many limitations of the traditional persistent homology. Rather than inducing filtration via the varying of a distance threshold, we introduced kNN-tPCA, where filtrations are achieved by varying the number of neighbors in a kNN network at each step, and find that this framework has significant implications for hyper-parameter tuning. We validate the efficacy of our proposed tPCA and kNN-tPCA methods on 11 diverse benchmark scRNA-seq datasets, and showcase that our methods outperform other unsupervised PCA enhancements from the literature, as well as popular Uniform Manifold Approximation (UMAP), t-Distributed Stochastic Neighbor Embedding (tSNE), and Projection Non-Negative Matrix Factorization (NMF) by significant margins. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14521v1-abstract-full').style.display = 'none'; document.getElementById('2310.14521v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.02213">arXiv:2309.02213</a> <span> [<a href="https://arxiv.org/pdf/2309.02213">pdf</a>, <a href="https://arxiv.org/ps/2309.02213">ps</a>, <a href="https://arxiv.org/format/2309.02213">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Bi-clustering of Neural Spiking Activity with Latent Structures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Ganchao Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.02213v3-abstract-short" style="display: inline;"> Modern neural recording techniques allow neuroscientists to obtain spiking activity of multiple neurons from different brain regions over long time periods, which requires new statistical methods to be developed for understanding structure of the large-scale data. In this paper, we develop a bi-clustering method to cluster the neural spiking activity spatially and temporally, according to their lo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.02213v3-abstract-full').style.display = 'inline'; document.getElementById('2309.02213v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.02213v3-abstract-full" style="display: none;"> Modern neural recording techniques allow neuroscientists to obtain spiking activity of multiple neurons from different brain regions over long time periods, which requires new statistical methods to be developed for understanding structure of the large-scale data. In this paper, we develop a bi-clustering method to cluster the neural spiking activity spatially and temporally, according to their low-dimensional latent structures. The spatial (neuron) clusters are defined by the latent trajectories within each neural population, while the temporal (state) clusters are defined by (populationally) synchronous local linear dynamics shared with different periods. To flexibly extract the bi-clustering structure, we build the model non-parametrically, and develop an efficient Markov chain Monte Carlo (MCMC) algorithm to sample the posterior distributions of model parameters. Validating our proposed MCMC algorithm through simulations, we find the method can recover unknown parameters and true bi-clustering structures successfully. We then apply the proposed bi-clustering method to multi-regional neural recordings under different experiment settings, where we find that simultaneously considering latent trajectories and spatial-temporal clustering structures can provide us with a more accurate and interpretable result. Overall, the proposed method provides scientific insights for large-scale (counting) time series with elongated recording periods, and it can potentially have application beyond neuroscience. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.02213v3-abstract-full').style.display = 'none'; document.getElementById('2309.02213v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.06920">arXiv:2308.06920</a> <span> [<a href="https://arxiv.org/pdf/2308.06920">pdf</a>, <a href="https://arxiv.org/format/2308.06920">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> ChatGPT in Drug Discovery: A Case Study on Anti-Cocaine Addiction Drug Development with Chatbots </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.06920v2-abstract-short" style="display: inline;"> The birth of ChatGPT, a cutting-edge language model-based chatbot developed by OpenAI, ushered in a new era in AI. However, due to potential pitfalls, its role in rigorous scientific research is not clear yet. This paper vividly showcases its innovative application within the field of drug discovery. Focused specifically on developing anti-cocaine addiction drugs, the study employs GPT-4 as a virt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06920v2-abstract-full').style.display = 'inline'; document.getElementById('2308.06920v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.06920v2-abstract-full" style="display: none;"> The birth of ChatGPT, a cutting-edge language model-based chatbot developed by OpenAI, ushered in a new era in AI. However, due to potential pitfalls, its role in rigorous scientific research is not clear yet. This paper vividly showcases its innovative application within the field of drug discovery. Focused specifically on developing anti-cocaine addiction drugs, the study employs GPT-4 as a virtual guide, offering strategic and methodological insights to researchers working on generative models for drug candidates. The primary objective is to generate optimal drug-like molecules with desired properties. By leveraging the capabilities of ChatGPT, the study introduces a novel approach to the drug discovery process. This symbiotic partnership between AI and researchers transforms how drug development is approached. Chatbots become facilitators, steering researchers towards innovative methodologies and productive paths for creating effective drug candidates. This research sheds light on the collaborative synergy between human expertise and AI assistance, wherein ChatGPT's cognitive abilities enhance the design and development of potential pharmaceutical solutions. This paper not only explores the integration of advanced AI in drug discovery but also reimagines the landscape by advocating for AI-powered chatbots as trailblazers in revolutionizing therapeutic innovation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06920v2-abstract-full').style.display = 'none'; document.getElementById('2308.06920v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.14587">arXiv:2307.14587</a> <span> [<a href="https://arxiv.org/pdf/2307.14587">pdf</a>, <a href="https://arxiv.org/format/2307.14587">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Artificial intelligence-aided protein engineering: from topological data analysis to deep protein language models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Qiu%2C+Y">Yuchi Qiu</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.14587v1-abstract-short" style="display: inline;"> Protein engineering is an emerging field in biotechnology that has the potential to revolutionize various areas, such as antibody design, drug discovery, food security, ecology, and more. However, the mutational space involved is too vast to be handled through experimental means alone. Leveraging accumulative protein databases, machine learning (ML) models, particularly those based on natural lang… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.14587v1-abstract-full').style.display = 'inline'; document.getElementById('2307.14587v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.14587v1-abstract-full" style="display: none;"> Protein engineering is an emerging field in biotechnology that has the potential to revolutionize various areas, such as antibody design, drug discovery, food security, ecology, and more. However, the mutational space involved is too vast to be handled through experimental means alone. Leveraging accumulative protein databases, machine learning (ML) models, particularly those based on natural language processing (NLP), have considerably expedited protein engineering. Moreover, advances in topological data analysis (TDA) and artificial intelligence-based protein structure prediction, such as AlphaFold2, have made more powerful structure-based ML-assisted protein engineering strategies possible. This review aims to offer a comprehensive, systematic, and indispensable set of methodological components, including TDA and NLP, for protein engineering and to facilitate their future development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.14587v1-abstract-full').style.display = 'none'; document.getElementById('2307.14587v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.05794">arXiv:2307.05794</a> <span> [<a href="https://arxiv.org/pdf/2307.05794">pdf</a>, <a href="https://arxiv.org/format/2307.05794">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Machine Learning Study of the Extended Drug-target Interaction Network informed by Pain Related Voltage-Gated Sodium Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+L">Long Chen</a>, <a href="/search/q-bio?searchtype=author&query=Jiang%2C+J">Jian Jiang</a>, <a href="/search/q-bio?searchtype=author&query=Dou%2C+B">Bozheng Dou</a>, <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Liu%2C+J">Jie Liu</a>, <a href="/search/q-bio?searchtype=author&query=Zhu%2C+Y">Yueying Zhu</a>, <a href="/search/q-bio?searchtype=author&query=Zhang%2C+B">Bengong Zhang</a>, <a href="/search/q-bio?searchtype=author&query=Zhou%2C+T">Tianshou Zhou</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.05794v1-abstract-short" style="display: inline;"> Pain is a significant global health issue, and the current treatment options for pain management have limitations in terms of effectiveness, side effects, and potential for addiction. There is a pressing need for improved pain treatments and the development of new drugs. Voltage-gated sodium channels, particularly Nav1.3, Nav1.7, Nav1.8, and Nav1.9, play a crucial role in neuronal excitability and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05794v1-abstract-full').style.display = 'inline'; document.getElementById('2307.05794v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.05794v1-abstract-full" style="display: none;"> Pain is a significant global health issue, and the current treatment options for pain management have limitations in terms of effectiveness, side effects, and potential for addiction. There is a pressing need for improved pain treatments and the development of new drugs. Voltage-gated sodium channels, particularly Nav1.3, Nav1.7, Nav1.8, and Nav1.9, play a crucial role in neuronal excitability and are predominantly expressed in the peripheral nervous system. Targeting these channels may provide a means to treat pain while minimizing central and cardiac adverse effects. In this study, we construct protein-protein interaction (PPI) networks based on pain-related sodium channels and develop a corresponding drug-target interaction (DTI) network to identify potential lead compounds for pain management. To ensure reliable machine learning predictions, we carefully select 111 inhibitor datasets from a pool of over 1,000 targets in the PPI network. We employ three distinct machine learning algorithms combined with advanced natural language processing (NLP)-based embeddings, specifically pre-trained transformer and autoencoder representations. Through a systematic screening process, we evaluate the side effects and repurposing potential of over 150,000 drug candidates targeting Nav1.7 and Nav1.8 sodium channels. Additionally, we assess the ADMET (absorption, distribution, metabolism, excretion, and toxicity) properties of these candidates to identify leads with near-optimal characteristics. Our strategy provides an innovative platform for the pharmacological development of pain treatments, offering the potential for improved efficacy and reduced side effects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05794v1-abstract-full').style.display = 'none'; document.getElementById('2307.05794v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.07484">arXiv:2306.07484</a> <span> [<a href="https://arxiv.org/pdf/2306.07484">pdf</a>, <a href="https://arxiv.org/format/2306.07484">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Multi-objective Molecular Optimization for Opioid Use Disorder Treatment Using Generative Network Complex </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Zhan%2C+C">Chang-Guo Zhan</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.07484v1-abstract-short" style="display: inline;"> Opioid Use Disorder (OUD) has emerged as a significant global public health issue, with complex multifaceted conditions. Due to the lack of effective treatment options for various conditions, there is a pressing need for the discovery of new medications. In this study, we propose a deep generative model that combines a stochastic differential equation (SDE)-based diffusion modeling with the latent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.07484v1-abstract-full').style.display = 'inline'; document.getElementById('2306.07484v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.07484v1-abstract-full" style="display: none;"> Opioid Use Disorder (OUD) has emerged as a significant global public health issue, with complex multifaceted conditions. Due to the lack of effective treatment options for various conditions, there is a pressing need for the discovery of new medications. In this study, we propose a deep generative model that combines a stochastic differential equation (SDE)-based diffusion modeling with the latent space of a pretrained autoencoder model. The molecular generator enables efficient generation of molecules that are effective on multiple targets, specifically the mu, kappa, and delta opioid receptors. Furthermore, we assess the ADMET (absorption, distribution, metabolism, excretion, and toxicity) properties of the generated molecules to identify drug-like compounds. To enhance the pharmacokinetic properties of some lead compounds, we employ a molecular optimization approach. We obtain a diverse set of drug-like molecules. We construct binding affinity predictors by integrating molecular fingerprints derived from autoencoder embeddings, transformer embeddings, and topological Laplacians with advanced machine learning algorithms. Further experimental studies are needed to evaluate the pharmacological effects of these drug-like compounds for OUD treatment. Our machine learning platform serves as a valuable tool in designing and optimizing effective molecules for addressing OUD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.07484v1-abstract-full').style.display = 'none'; document.getElementById('2306.07484v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.04658">arXiv:2306.04658</a> <span> [<a href="https://arxiv.org/pdf/2306.04658">pdf</a>, <a href="https://arxiv.org/format/2306.04658">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mathematics-assisted directed evolution and protein engineering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Qiu%2C+Y">Yuchi Qiu</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.04658v1-abstract-short" style="display: inline;"> Directed evolution is a molecular biology technique that is transforming protein engineering by creating proteins with desirable properties and functions. However, it is experimentally impossible to perform the deep mutational scanning of the entire protein library due to the enormous mutational space, which scales as $20^N$ , where N is the number of amino acids. This has led to the rapid growth… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.04658v1-abstract-full').style.display = 'inline'; document.getElementById('2306.04658v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.04658v1-abstract-full" style="display: none;"> Directed evolution is a molecular biology technique that is transforming protein engineering by creating proteins with desirable properties and functions. However, it is experimentally impossible to perform the deep mutational scanning of the entire protein library due to the enormous mutational space, which scales as $20^N$ , where N is the number of amino acids. This has led to the rapid growth of AI-assisted directed evolution (AIDE) or AI-assisted protein engineering (AIPE) as an emerging research field. Aided with advanced natural language processing (NLP) techniques, including long short-term memory, autoencoder, and transformer, sequence-based embeddings have been dominant approaches in AIDE and AIPE. Persistent Laplacians, an emerging technique in topological data analysis (TDA), have made structure-based embeddings a superb option in AIDE and AIPE. We argue that a class of persistent topological Laplacians (PTLs), including persistent Laplacians, persistent path Laplacians, persistent sheaf Laplacians, persistent hypergraph Laplacians, persistent hyperdigraph Laplacians, and evolutionary de Rham-Hodge theory, can effectively overcome the limitations of the current TDA and offer a new generation of more powerful TDA approaches. In the general framework of topological deep learning, mathematics-assisted directed evolution (MADE) has a great potential for future protein engineering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.04658v1-abstract-full').style.display = 'none'; document.getElementById('2306.04658v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.00240">arXiv:2303.00240</a> <span> [<a href="https://arxiv.org/pdf/2303.00240">pdf</a>, <a href="https://arxiv.org/format/2303.00240">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Machine-learning Repurposing of DrugBank Compounds for Opioid Use Disorder </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Jiang%2C+J">Jian Jiang</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.00240v1-abstract-short" style="display: inline;"> Opioid use disorder (OUD) is a chronic and relapsing condition that involves the continued and compulsive use of opioids despite harmful consequences. The development of medications with improved efficacy and safety profiles for OUD treatment is urgently needed. Drug repurposing is a promising option for drug discovery due to its reduced cost and expedited approval procedures. Computational approa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.00240v1-abstract-full').style.display = 'inline'; document.getElementById('2303.00240v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.00240v1-abstract-full" style="display: none;"> Opioid use disorder (OUD) is a chronic and relapsing condition that involves the continued and compulsive use of opioids despite harmful consequences. The development of medications with improved efficacy and safety profiles for OUD treatment is urgently needed. Drug repurposing is a promising option for drug discovery due to its reduced cost and expedited approval procedures. Computational approaches based on machine learning enable the rapid screening of DrugBank compounds, identifying those with the potential to be repurposed for OUD treatment. We collected inhibitor data for four major opioid receptors and used advanced machine learning predictors of binding affinity that fuse the gradient boosting decision tree algorithm with two natural language processing (NLP)-based molecular fingerprints and one traditional 2D fingerprint. Using these predictors, we systematically analyzed the binding affinities of DrugBank compounds on four opioid receptors. Based on our machine learning predictions, we were able to discriminate DrugBank compounds with various binding affinity thresholds and selectivities for different receptors. The prediction results were further analyzed for ADMET (absorption, distribution, metabolism, excretion, and toxicity), which provided guidance on repurposing DrugBank compounds for the inhibition of selected opioid receptors. The pharmacological effects of these compounds for OUD treatment need to be tested in further experimental studies and clinical trials. Our machine learning studies provide a valuable platform for drug discovery in the context of OUD treatment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.00240v1-abstract-full').style.display = 'none'; document.getElementById('2303.00240v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.10865">arXiv:2301.10865</a> <span> [<a href="https://arxiv.org/pdf/2301.10865">pdf</a>, <a href="https://arxiv.org/format/2301.10865">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Persistent topological Laplacian analysis of SARS-CoV-2 variants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wei%2C+X">Xiaoqi Wei</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.10865v2-abstract-short" style="display: inline;"> Topological data analysis (TDA) is an emerging field in mathematics and data science. Its central technique, persistent homology, has had tremendous success in many science and engineering disciplines. However, persistent homology has limitations, including its incapability of describing the homotopic shape evolution of data during filtration. Persistent topological Laplacians (PTLs), such as pers… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10865v2-abstract-full').style.display = 'inline'; document.getElementById('2301.10865v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.10865v2-abstract-full" style="display: none;"> Topological data analysis (TDA) is an emerging field in mathematics and data science. Its central technique, persistent homology, has had tremendous success in many science and engineering disciplines. However, persistent homology has limitations, including its incapability of describing the homotopic shape evolution of data during filtration. Persistent topological Laplacians (PTLs), such as persistent Laplacian and persistent sheaf Laplacian, were proposed to overcome the drawback of persistent homology. In this work, we examine the modeling and analysis power of PTLs in the study of the protein structures of the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) spike receptor binding domain (RBD) and its variants, i.e., Alpha, Beta, Gamma, BA.1, and BA.2. First, we employ PTLs to study how the RBD mutation-induced structural changes of RBD-angiotensin-converting enzyme 2 (ACE2) binding complexes are captured in the changes of spectra of the PTLs among SARS-CoV-2 variants. Additionally, we use PTLs to analyze the binding of RBD and ACE2-induced structural changes of various SARS-CoV-2 variants. Finally, we explore the impacts of computationally generated RBD structures on PTL-based machine learning, including deep learning, and predictions of deep mutational scanning datasets for the SARS-CoV-2 Omicron BA.2 variant. Our results indicate that PTLs have advantages over persistent homology in analyzing protein structural changes and provide a powerful new TDA tool for data science. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10865v2-abstract-full').style.display = 'none'; document.getElementById('2301.10865v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.04815">arXiv:2301.04815</a> <span> [<a href="https://arxiv.org/pdf/2301.04815">pdf</a>, <a href="https://arxiv.org/format/2301.04815">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Machine-learning Analysis of Opioid Use Disorder Informed by MOR, DOR, KOR, NOR and ZOR-Based Interactome Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Elladki%2C+R">Rana Elladki</a>, <a href="/search/q-bio?searchtype=author&query=Jiang%2C+J">Jian Jiang</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.04815v1-abstract-short" style="display: inline;"> Opioid use disorder (OUD) continuously poses major public health challenges and social implications worldwide with dramatic rise of opioid dependence leading to potential abuse. Despite that a few pharmacological agents have been approved for OUD treatment, the efficacy of said agents for OUD requires further improvement in order to provide safer and more effective pharmacological and psychosocial… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.04815v1-abstract-full').style.display = 'inline'; document.getElementById('2301.04815v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.04815v1-abstract-full" style="display: none;"> Opioid use disorder (OUD) continuously poses major public health challenges and social implications worldwide with dramatic rise of opioid dependence leading to potential abuse. Despite that a few pharmacological agents have been approved for OUD treatment, the efficacy of said agents for OUD requires further improvement in order to provide safer and more effective pharmacological and psychosocial treatments. Preferable therapeutic treatments of OUD rely on the advances in understanding the neurobiological mechanism of opioid dependence. Proteins including mu, delta, kappa, nociceptin, and zeta opioid receptors are the direct targets of opioids. Each receptor has a large protein-protein interaction (PPI) network, that behaves differently when subjected to various treatments, thus increasing the complexity in the drug development process for an effective opioid addiction treatment. The report below analyzes the work by presenting a PPI-network informed machine-learning study of OUD. We have examined more than 500 proteins in the five opioid receptor networks and subsequently collected 74 inhibitor datasets. Machine learning models were constructed by pairing gradient boosting decision tree (GBDT) algorithm with two advanced natural language processing (NLP)-based molecular fingerprints. With these models, we systematically carried out evaluations of screening and repurposing potential of drug candidates for four opioid receptors. In addition, absorption, distribution, metabolism, excretion, and toxicity (ADMET) properties were also considered in the screening of potential drug candidates. Our study can be a valuable and promising tool of pharmacological development for OUD treatments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.04815v1-abstract-full').style.display = 'none'; document.getElementById('2301.04815v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.13617">arXiv:2212.13617</a> <span> [<a href="https://arxiv.org/pdf/2212.13617">pdf</a>, <a href="https://arxiv.org/format/2212.13617">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> SVSBI: Sequence-based virtual screening of biomolecular interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Shen%2C+L">Li Shen</a>, <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Qiu%2C+Y">Yuchi Qiu</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.13617v1-abstract-short" style="display: inline;"> Virtual screening (VS) is an essential technique for understanding biomolecular interactions, particularly, drug design and discovery. The best-performing VS models depend vitally on three-dimensional (3D) structures, which are not available in general but can be obtained from molecular docking. However, current docking accuracy is relatively low, rendering unreliable VS models. We introduce seque… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.13617v1-abstract-full').style.display = 'inline'; document.getElementById('2212.13617v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.13617v1-abstract-full" style="display: none;"> Virtual screening (VS) is an essential technique for understanding biomolecular interactions, particularly, drug design and discovery. The best-performing VS models depend vitally on three-dimensional (3D) structures, which are not available in general but can be obtained from molecular docking. However, current docking accuracy is relatively low, rendering unreliable VS models. We introduce sequence-based virtual screening (SVS) as a new generation of VS models for modeling biomolecular interactions. The SVS model utilizes advanced natural language processing (NLP) algorithms and optimizes deep $K$-embedding strategies to encode biomolecular interactions without invoking 3D structure-based docking. We demonstrate the state-of-art performance of SVS for four regression datasets involving protein-ligand binding, protein-protein, protein-nucleic acid binding, and ligand inhibition of protein-protein interactions and five classification datasets for the protein-protein interactions in five biological species. SVS has the potential to dramatically change the current practice in drug discovery and protein engineering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.13617v1-abstract-full').style.display = 'none'; document.getElementById('2212.13617v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.06759">arXiv:2211.06759</a> <span> [<a href="https://arxiv.org/pdf/2211.06759">pdf</a>, <a href="https://arxiv.org/format/2211.06759">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Integrating Transformer and Autoencoder Techniques with Spectral Graph Algorithms for the Prediction of Scarcely Labeled Molecular Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Hayes%2C+N">Nicole Hayes</a>, <a href="/search/q-bio?searchtype=author&query=Merkurjev%2C+E">Ekaterina Merkurjev</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.06759v2-abstract-short" style="display: inline;"> In molecular and biological sciences, experiments are expensive, time-consuming, and often subject to ethical constraints. Consequently, one often faces the challenging task of predicting desirable properties from small data sets or scarcely-labeled data sets. Although transfer learning can be advantageous, it requires the existence of a related large data set. This work introduces three graph-bas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.06759v2-abstract-full').style.display = 'inline'; document.getElementById('2211.06759v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.06759v2-abstract-full" style="display: none;"> In molecular and biological sciences, experiments are expensive, time-consuming, and often subject to ethical constraints. Consequently, one often faces the challenging task of predicting desirable properties from small data sets or scarcely-labeled data sets. Although transfer learning can be advantageous, it requires the existence of a related large data set. This work introduces three graph-based models incorporating Merriman-Bence-Osher (MBO) techniques to tackle this challenge. Specifically, graph-based modifications of the MBO scheme are integrated with state-of-the-art techniques, including a home-made transformer and an autoencoder, in order to deal with scarcely-labeled data sets. In addition, a consensus technique is detailed. The proposed models are validated using five benchmark data sets. We also provide a thorough comparison to other competing methods, such as support vector machines, random forests, and gradient boosting decision trees, which are known for their good performance on small data sets. The performances of various methods are analyzed using residue-similarity (R-S) scores and R-S indices. Extensive computational experiments and theoretical analysis show that the new models perform very well even when as little as 1% of the data set is used as labeled data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.06759v2-abstract-full').style.display = 'none'; document.getElementById('2211.06759v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.00861">arXiv:2211.00861</a> <span> [<a href="https://arxiv.org/pdf/2211.00861">pdf</a>, <a href="https://arxiv.org/format/2211.00861">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Virtual screening of DrugBank database for hERG blockers using topological Laplacian-assisted AI models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guowei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.00861v1-abstract-short" style="display: inline;"> The human {\it ether-a-go-go} (hERG) potassium channel (K$_\text{v}11.1$) plays a critical role in mediating cardiac action potential. The blockade of this ion channel can potentially lead fatal disorder and/or long QT syndrome. Many drugs have been withdrawn because of their serious hERG-cardiotoxicity. It is crucial to assess the hERG blockade activity in the early stage of drug discovery. We ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00861v1-abstract-full').style.display = 'inline'; document.getElementById('2211.00861v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.00861v1-abstract-full" style="display: none;"> The human {\it ether-a-go-go} (hERG) potassium channel (K$_\text{v}11.1$) plays a critical role in mediating cardiac action potential. The blockade of this ion channel can potentially lead fatal disorder and/or long QT syndrome. Many drugs have been withdrawn because of their serious hERG-cardiotoxicity. It is crucial to assess the hERG blockade activity in the early stage of drug discovery. We are particularly interested in the hERG-cardiotoxicity of compounds collected in the DrugBank database considering that many DrugBank compounds have been approved for therapeutic treatments or have high potential to become drugs. Machine learning-based in silico tools offer a rapid and economical platform to virtually screen DrugBank compounds. We design accurate and robust classifiers for blockers/non-blockers and then build regressors to quantitatively analyze the binding potency of the DrugBank compounds on the hERG channel. Molecular sequences are embedded with two natural language processing (NPL) methods, namely, autoencoder and transformer. Complementary three-dimensional (3D) molecular structures are embedded with two advanced mathematical approaches, i.e., topological Laplacians and algebraic graphs. With our state-of-the-art tools, we reveal that 227 out of the 8641 DrugBank compounds are potential hERG blockers, suggesting serious drug safety problems. Our predictions provide guidance for the further experimental interrogation of DrugBank compounds' hERG-cardiotoxicity . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00861v1-abstract-full').style.display = 'none'; document.getElementById('2211.00861v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.09485">arXiv:2210.09485</a> <span> [<a href="https://arxiv.org/pdf/2210.09485">pdf</a>, <a href="https://arxiv.org/format/2210.09485">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Emerging dominant SARS-CoV-2 variants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Hozumi%2C+Y">Yuta Hozumi</a>, <a href="/search/q-bio?searchtype=author&query=Liu%2C+G">Gengzhuo Liu</a>, <a href="/search/q-bio?searchtype=author&query=Qiu%2C+Y">Yuchi Qiu</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+X">Xiaoqi Wei</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.09485v1-abstract-short" style="display: inline;"> Accurate and reliable forecasting of emerging dominant severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) variants enables policymakers and vaccine makers to get prepared for future waves of infections. The last three waves of SARS-CoV-2 infections caused by dominant variants Omicron (BA.1), BA.2, and BA.4/BA.5 were accurately foretold by our artificial intelligence (AI) models built wit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.09485v1-abstract-full').style.display = 'inline'; document.getElementById('2210.09485v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.09485v1-abstract-full" style="display: none;"> Accurate and reliable forecasting of emerging dominant severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) variants enables policymakers and vaccine makers to get prepared for future waves of infections. The last three waves of SARS-CoV-2 infections caused by dominant variants Omicron (BA.1), BA.2, and BA.4/BA.5 were accurately foretold by our artificial intelligence (AI) models built with biophysics, genotyping of viral genomes, experimental data, algebraic topology, and deep learning. Based on newly available experimental data, we analyzed the impacts of all possible viral spike (S) protein receptor-binding domain (RBD) mutations on the SARS-CoV-2 infectivity. Our analysis sheds light on viral evolutionary mechanisms, i.e., natural selection through infectivity strengthening and antibody resistance. We forecast that BA.2.10.4, BA.2.75, BQ.1.1, and particularly, BA.2.75+R346T, have high potential to become new dominant variants to drive the next surge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.09485v1-abstract-full').style.display = 'none'; document.getElementById('2210.09485v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.03229">arXiv:2209.03229</a> <span> [<a href="https://arxiv.org/pdf/2209.03229">pdf</a>, <a href="https://arxiv.org/format/2209.03229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Topological AI forecasting of future dominating viral variants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.03229v1-abstract-short" style="display: inline;"> The understanding of the mechanisms of SARS-CoV-2 evolution and transmission is one of the greatest challenges of our time. By integrating artificial intelligence (AI), viral genomes isolated from patients, tens of thousands of mutational data, biophysics, bioinformatics, and algebraic topology, the SARS-CoV-2 evolution was revealed to be governed by infectivity-based natural selection. Two key mu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.03229v1-abstract-full').style.display = 'inline'; document.getElementById('2209.03229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.03229v1-abstract-full" style="display: none;"> The understanding of the mechanisms of SARS-CoV-2 evolution and transmission is one of the greatest challenges of our time. By integrating artificial intelligence (AI), viral genomes isolated from patients, tens of thousands of mutational data, biophysics, bioinformatics, and algebraic topology, the SARS-CoV-2 evolution was revealed to be governed by infectivity-based natural selection. Two key mutation sites, L452 and N501 on the viral spike protein receptor-binding domain (RBD), were predicted in summer 2020, long before they occur in prevailing variants Alpha, Beta, Gamma, Delta, Kappa, Theta, Lambda, Mu, and Omicron. Recent studies identified a new mechanism of natural selection: antibody resistance. AI-based forecasting of Omicron's infectivity, vaccine breakthrough, and antibody resistance was later nearly perfectly confirmed by experiments. The replacement of dominant BA.1 by BA.2 in later March was predicted in early February. On May 1, 2022, persistent Laplacian-based AI projected Omicron BA.4 and BA.5 to become the new dominating COVID-19 variants. This prediction became reality in late June. Topological AI models offer accurate prediction of mutational impacts on the efficacy of monoclonal antibodies (mAbs). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.03229v1-abstract-full').style.display = 'none'; document.getElementById('2209.03229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> SIAM 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.10639">arXiv:2205.10639</a> <span> [<a href="https://arxiv.org/pdf/2205.10639">pdf</a>, <a href="https://arxiv.org/ps/2205.10639">ps</a>, <a href="https://arxiv.org/format/2205.10639">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> A Flexible Bayesian Clustering of Dynamic Subpopulations in Neural Spiking Activity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Ganchao Wei</a>, <a href="/search/q-bio?searchtype=author&query=Stevenson%2C+I+H">Ian H. Stevenson</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+X">Xiaojing Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.10639v4-abstract-short" style="display: inline;"> With advances in neural recording techniques, neuroscientists are now able to record the spiking activity of many hundreds of neurons simultaneously, and new statistical methods are needed to understand the structure of this large-scale neural population activity. Although previous work has tried to summarize neural activity within and between known populations by extracting low-dimensional latent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.10639v4-abstract-full').style.display = 'inline'; document.getElementById('2205.10639v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.10639v4-abstract-full" style="display: none;"> With advances in neural recording techniques, neuroscientists are now able to record the spiking activity of many hundreds of neurons simultaneously, and new statistical methods are needed to understand the structure of this large-scale neural population activity. Although previous work has tried to summarize neural activity within and between known populations by extracting low-dimensional latent factors, in many cases what determines a unique population may be unclear. Neurons differ in their anatomical location, but also, in their cell types and response properties. To identify populations directly related to neural activity, we develop a clustering method based on a mixture of dynamic Poisson factor analyzers (mixDPFA) model, with the number of clusters and dimension of latent factors for each cluster treated as unknown parameters. To analyze the proposed mixDPFA model, we propose a Markov chain Monte Carlo (MCMC) algorithm to efficiently sample its posterior distribution. Validating our proposed MCMC algorithm through simulations, we find that it can accurately recover the unknown parameters and the true clustering in the model, and is insensitive to the initial cluster assignments. We then apply the proposed mixDPFA model to multi-region experimental recordings, where we find that the proposed method can identify novel, reliable clusters of neurons based on their activity, and may, thus, be a useful tool for neural data analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.10639v4-abstract-full').style.display = 'none'; document.getElementById('2205.10639v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.00532">arXiv:2205.00532</a> <span> [<a href="https://arxiv.org/pdf/2205.00532">pdf</a>, <a href="https://arxiv.org/format/2205.00532">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Persistent Laplacian projected Omicron BA.4 and BA.5 to become new dominating variants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Qiu%2C+Y">Yuchi Qiu</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.00532v1-abstract-short" style="display: inline;"> Due to its high transmissibility, Omicron BA.1 ousted the Delta variant to become a dominating variant in late 2021 and was replaced by more transmissible Omicron BA.2 in March 2022. An important question is which new variants will dominate in the future. Topology-based deep learning models have had tremendous success in forecasting emerging variants in the past. However, topology is insensitive t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.00532v1-abstract-full').style.display = 'inline'; document.getElementById('2205.00532v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.00532v1-abstract-full" style="display: none;"> Due to its high transmissibility, Omicron BA.1 ousted the Delta variant to become a dominating variant in late 2021 and was replaced by more transmissible Omicron BA.2 in March 2022. An important question is which new variants will dominate in the future. Topology-based deep learning models have had tremendous success in forecasting emerging variants in the past. However, topology is insensitive to homotopic shape variations in virus-human protein-protein binding, which are crucial to viral evolution and transmission. This challenge is tackled with persistent Laplacian, which is able to capture both the topology and shape of data. Persistent Laplacian-based deep learning models are developed to systematically evaluate variant infectivity. Our comparative analysis of Alpha, Beta, Gamma, Delta, Lambda, Mu, and Omicron BA.1, BA.1.1, BA.2, BA.2.11, BA.2.12.1, BA.3, BA.4, and BA.5 unveils that Omicron BA.2.11, BA.2.12.1, BA.3, BA.4, and BA.5 are more contagious than BA.2. In particular, BA.4 and BA.5 are about 36\% more infectious than BA.2 and are projected to become new dominating variants by natural selection. Moreover, the proposed models outperform the state-of-the-art methods on three major benchmark datasets for mutation-induced protein-protein binding free energy changes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.00532v1-abstract-full').style.display = 'none'; document.getElementById('2205.00532v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.00507">arXiv:2205.00507</a> <span> [<a href="https://arxiv.org/pdf/2205.00507">pdf</a>, <a href="https://arxiv.org/format/2205.00507">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Dynamic modeling of spike count data with Conway-Maxwell Poisson variability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Ganchao Wei</a>, <a href="/search/q-bio?searchtype=author&query=Stevenson%2C+I+H">Ian H. Stevenson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.00507v2-abstract-short" style="display: inline;"> In many areas of the brain, neural spiking activity covaries with features of the external world, such as sensory stimuli or an animal's movement. Experimental findings suggest that the variability of neural activity changes over time and may provide information about the external world beyond the information provided by the average neural activity. To flexibly track time-varying neural response p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.00507v2-abstract-full').style.display = 'inline'; document.getElementById('2205.00507v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.00507v2-abstract-full" style="display: none;"> In many areas of the brain, neural spiking activity covaries with features of the external world, such as sensory stimuli or an animal's movement. Experimental findings suggest that the variability of neural activity changes over time and may provide information about the external world beyond the information provided by the average neural activity. To flexibly track time-varying neural response properties, here we developed a dynamic model with Conway-Maxwell Poisson (CMP) observations. The CMP distribution can flexibly describe firing patterns that are both under- and over-dispersed relative to the Poisson distribution. Here we track parameters of the CMP distribution as they vary over time. Using simulations, we show that a normal approximation can accurately track dynamics in state vectors for both the centering and shape parameters ($位$ and $谓$). We then fit our model to neural data from neurons in primary visual cortex and "place cells" in the hippocampus. We find that this method out-performs previous dynamic models based on the Poisson distribution. The dynamic CMP model provides a flexible framework for tracking time-varying non-Poisson count data and may also have applications beyond neuroscience. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.00507v2-abstract-full').style.display = 'none'; document.getElementById('2205.00507v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.09471">arXiv:2204.09471</a> <span> [<a href="https://arxiv.org/pdf/2204.09471">pdf</a>, <a href="https://arxiv.org/format/2204.09471">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Mathematical artificial intelligence design of mutation-proof COVID-19 monoclonal antibodies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.09471v1-abstract-short" style="display: inline;"> Emerging severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) variants have compromised existing vaccines and posed a grand challenge to coronavirus disease 2019 (COVID-19) prevention, control, and global economic recovery. For COVID-19 patients, one of the most effective COVID-19 medications is monoclonal antibody (mAb) therapies. The United States Food and Drug Administration (U.S. FDA)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09471v1-abstract-full').style.display = 'inline'; document.getElementById('2204.09471v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.09471v1-abstract-full" style="display: none;"> Emerging severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) variants have compromised existing vaccines and posed a grand challenge to coronavirus disease 2019 (COVID-19) prevention, control, and global economic recovery. For COVID-19 patients, one of the most effective COVID-19 medications is monoclonal antibody (mAb) therapies. The United States Food and Drug Administration (U.S. FDA) has given the emergency use authorization (EUA) to a few mAbs, including those from Regeneron, Eli Elly, etc. However, they are also undermined by SARS-CoV-2 mutations. It is imperative to develop effective mutation-proof mAbs for treating COVID-19 patients infected by all emerging variants and/or the original SARS-CoV-2. We carry out a deep mutational scanning to present the blueprint of such mAbs using algebraic topology and artificial intelligence (AI). To reduce the risk of clinical trial-related failure, we select five mAbs either with FDA EUA or in clinical trials as our starting point. We demonstrate that topological AI-designed mAbs are effective to variants of concerns and variants of interest designated by the World Health Organization (WHO), as well as the original SARS-CoV-2. Our topological AI methodologies have been validated by tens of thousands of deep mutational data and their predictions have been confirmed by results from tens of experimental laboratories and population-level statistics of genome isolates from hundreds of thousands of patients. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09471v1-abstract-full').style.display = 'none'; document.getElementById('2204.09471v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.05031">arXiv:2202.05031</a> <span> [<a href="https://arxiv.org/pdf/2202.05031">pdf</a>, <a href="https://arxiv.org/format/2202.05031">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Omicron BA.2 (B.1.1.529.2): high potential to becoming the next dominating variant </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.05031v1-abstract-short" style="display: inline;"> The Omicron variant of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has rapidly replaced the Delta variant as a dominating SARS-CoV-2 variant because of natural selection, which favors the variant with higher infectivity and stronger vaccine breakthrough ability. Omicron has three lineages or subvariants, BA.1 (B.1.1.529.1), BA.2 (B.1.1.529.2), and BA.3 (B.1.1.529.3). Among them, B… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.05031v1-abstract-full').style.display = 'inline'; document.getElementById('2202.05031v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.05031v1-abstract-full" style="display: none;"> The Omicron variant of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has rapidly replaced the Delta variant as a dominating SARS-CoV-2 variant because of natural selection, which favors the variant with higher infectivity and stronger vaccine breakthrough ability. Omicron has three lineages or subvariants, BA.1 (B.1.1.529.1), BA.2 (B.1.1.529.2), and BA.3 (B.1.1.529.3). Among them, BA.1 is the currently prevailing subvariant. BA.2 shares 32 mutations with BA.1 but has 28 distinct ones. BA.3 shares most of its mutations with BA.1 and BA.2 except for one. BA.2 is found to be able to alarmingly reinfect patients originally infected by Omicron BA.1. An important question is whether BA.2 or BA.3 will become a new dominating "variant of concern". Currently, no experimental data has been reported about BA.2 and BA.3. We construct a novel algebraic topology-based deep learning model trained with tens of thousands of mutational and deep mutational data to systematically evaluate BA.2's and BA.3's infectivity, vaccine breakthrough capability, and antibody resistance. Our comparative analysis of all main variants namely, Alpha, Beta, Gamma, Delta, Lambda, Mu, BA.1, BA.2, and BA.3, unveils that BA.2 is about 1.5 and 4.2 times as contagious as BA.1 and Delta, respectively. It is also 30% and 17-fold more capable than BA.1 and Delta, respectively, to escape current vaccines. Therefore, we project that Omicron BA.2 is on its path to becoming the next dominating variant. We forecast that like Omicron BA.1, BA.2 will also seriously compromise most existing mAbs, except for sotrovimab developed by GlaxoSmithKline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.05031v1-abstract-full').style.display = 'none'; document.getElementById('2202.05031v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.00114">arXiv:2201.00114</a> <span> [<a href="https://arxiv.org/pdf/2201.00114">pdf</a>, <a href="https://arxiv.org/format/2201.00114">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Molecular Networks">q-bio.MN</span> </div> </div> <p class="title is-5 mathjax"> Machine learning analysis of cocaine addiction informed by DAT, SERT, and NET-based interactome networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Feng%2C+H">Hongsong Feng</a>, <a href="/search/q-bio?searchtype=author&query=Gao%2C+K">Kaifu Gao</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/q-bio?searchtype=author&query=Robison%2C+A+J">Alfred J Robison</a>, <a href="/search/q-bio?searchtype=author&query=Ellsworth%2C+E">Edmund Ellsworth</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.00114v1-abstract-short" style="display: inline;"> Cocaine addiction is a psychosocial disorder induced by the chronic use of cocaine and causes a large of number deaths around the world. Despite many decades' effort, no drugs have been approved by the Food and Drug Administration (FDA) for the treatment of cocaine dependence. Cocaine dependence is neurological and involves many interacting proteins in the interactome. Among them, dopamine transpo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.00114v1-abstract-full').style.display = 'inline'; document.getElementById('2201.00114v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.00114v1-abstract-full" style="display: none;"> Cocaine addiction is a psychosocial disorder induced by the chronic use of cocaine and causes a large of number deaths around the world. Despite many decades' effort, no drugs have been approved by the Food and Drug Administration (FDA) for the treatment of cocaine dependence. Cocaine dependence is neurological and involves many interacting proteins in the interactome. Among them, dopamine transporter (DAT), serotonin transporter (SERT), and norepinephrine transporter (NET) are three major targets. Each of these targets has a large protein-protein interaction (PPI) network which must be considered in the anti-cocaine addiction drug discovery. This work presents DAT, SERT, and NET interactome network-informed machine learning/deep learning (ML/DL) studies of cocaine addiction. We collect and analyze 61 protein targets out 460 proteins in the DAT, SERT, and NET PPI networks that have sufficient existing inhibitor datasets. Utilizing autoencoder and other ML algorithms, we build ML/DL models for these targets with 115,407 inhibitors to predict drug repurposing potentials and possible side effects. We further screen their absorption, distribution, metabolism, and excretion, and toxicity (ADMET) properties to search for nearly optimal leads for anti-cocaine addiction. Our approach sets up a systematic protocol for artificial intelligence (AI)-based anti-cocaine addiction lead discovery. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.00114v1-abstract-full').style.display = 'none'; document.getElementById('2201.00114v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.01318">arXiv:2112.01318</a> <span> [<a href="https://arxiv.org/pdf/2112.01318">pdf</a>, <a href="https://arxiv.org/format/2112.01318">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Omicron (B.1.1.529): Infectivity, vaccine breakthrough, and antibody resistance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Gilby%2C+N+B">Nancy Benovich Gilby</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.01318v1-abstract-short" style="display: inline;"> The latest severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) variant Omicron (B.1.1.529) has ushered panic responses around the world due to its contagious and vaccine escape mutations. The essential infectivity and antibody resistance of the SARS-CoV-2 variant are determined by its mutations on the spike (S) protein receptor-binding domain (RBD). However, a complete experimental evalua… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.01318v1-abstract-full').style.display = 'inline'; document.getElementById('2112.01318v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.01318v1-abstract-full" style="display: none;"> The latest severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) variant Omicron (B.1.1.529) has ushered panic responses around the world due to its contagious and vaccine escape mutations. The essential infectivity and antibody resistance of the SARS-CoV-2 variant are determined by its mutations on the spike (S) protein receptor-binding domain (RBD). However, a complete experimental evaluation of Omicron might take weeks or even months. Here, we present a comprehensive quantitative analysis of Omicron's infectivity, vaccine-breakthrough, and antibody resistance. An artificial intelligence (AI) model, which has been trained with tens of thousands of experimental data points and extensively validated by experimental data on SARS-CoV-2, reveals that Omicron may be over ten times more contagious than the original virus or about twice as infectious as the Delta variant. Based on 132 three-dimensional (3D) structures of antibody-RBD complexes, we unveil that Omicron may be twice more likely to escape current vaccines than the Delta variant. The Food and Drug Administration (FDA)-approved monoclonal antibodies (mAbs) from Eli Lilly may be seriously compromised. Omicron may also diminish the efficacy of mAbs from Celltrion and Rockefeller University. However, its impact on Regeneron mAb cocktail appears to be mild. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.01318v1-abstract-full').style.display = 'none'; document.getElementById('2112.01318v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.04626">arXiv:2110.04626</a> <span> [<a href="https://arxiv.org/pdf/2110.04626">pdf</a>, <a href="https://arxiv.org/format/2110.04626">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> The evolution of the mechanisms of SARS-CoV-2 evolution revealing vaccine-resistant mutations in Europe and America </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.04626v1-abstract-short" style="display: inline;"> The importance of understanding SARS-CoV-2 evolution cannot be overemphasized. Recent studies confirm that natural selection is the dominating mechanism of SARS-CoV-2 evolution, which favors mutations that strengthen viral infectivity. We demonstrate that vaccine-breakthrough or antibody-resistant mutations provide a new mechanism of viral evolution. Specifically, vaccine-resistant mutation Y449S… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.04626v1-abstract-full').style.display = 'inline'; document.getElementById('2110.04626v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.04626v1-abstract-full" style="display: none;"> The importance of understanding SARS-CoV-2 evolution cannot be overemphasized. Recent studies confirm that natural selection is the dominating mechanism of SARS-CoV-2 evolution, which favors mutations that strengthen viral infectivity. We demonstrate that vaccine-breakthrough or antibody-resistant mutations provide a new mechanism of viral evolution. Specifically, vaccine-resistant mutation Y449S in the spike (S) protein receptor-bonding domain (RBD), which occurred in co-mutation [Y449S, N501Y], has reduced infectivity compared to the original SARS-CoV-2 but can disrupt existing antibodies that neutralize the virus. By tracing the evolutionary trajectories of vaccine-resistant mutations in over 1.9 million SARS-CoV-2 genomes, we reveal that the occurrence and frequency of vaccine-resistant mutations correlate strongly with the vaccination rates in Europe and America. We anticipate that as a complementary transmission pathway, vaccine-resistant mutations will become a dominating mechanism of SARS-CoV-2 evolution when most of the world's population is vaccinated. Our study sheds light on SARS-CoV-2 evolution and transmission and enables the design of the next-generation mutation-proof vaccines and antibody drugs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.04626v1-abstract-full').style.display = 'none'; document.getElementById('2110.04626v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.12100">arXiv:2109.12100</a> <span> [<a href="https://arxiv.org/pdf/2109.12100">pdf</a>, <a href="https://arxiv.org/format/2109.12100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1063/1674-0068/cjcp2109150">10.1063/1674-0068/cjcp2109150 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MLIMC: Machine learning-based implicit-solvent Monte Carlo </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Geng%2C+W">Weihua Geng</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.12100v1-abstract-short" style="display: inline;"> Monte Carlo (MC) methods are important computational tools for molecular structure optimizations and predictions. When solvent effects are explicitly considered, MC methods become very expensive due to the large degree of freedom associated with the water molecules and mobile ions. Alternatively implicit-solvent MC can largely reduce the computational cost by applying a mean field approximation to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.12100v1-abstract-full').style.display = 'inline'; document.getElementById('2109.12100v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.12100v1-abstract-full" style="display: none;"> Monte Carlo (MC) methods are important computational tools for molecular structure optimizations and predictions. When solvent effects are explicitly considered, MC methods become very expensive due to the large degree of freedom associated with the water molecules and mobile ions. Alternatively implicit-solvent MC can largely reduce the computational cost by applying a mean field approximation to solvent effects and meanwhile maintains the atomic detail of the target molecule. The two most popular implicit-solvent models are the Poisson-Boltzmann (PB) model and the Generalized Born (GB) model in a way such that the GB model is an approximation to the PB model but is much faster in simulation time. In this work, we develop a machine learning-based implicit-solvent Monte Carlo (MLIMC) method by combining the advantages of both implicit solvent models in accuracy and efficiency. Specifically, the MLIMC method uses a fast and accurate PB-based machine learning (PBML) scheme to compute the electrostatic solvation free energy at each step. We validate our MLIMC method by using a benzene-water system and a protein-water system. We show that the proposed MLIMC method has great advantages in speed and accuracy for molecular structure optimization and prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.12100v1-abstract-full').style.display = 'none'; document.getElementById('2109.12100v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.08718">arXiv:2109.08718</a> <span> [<a href="https://arxiv.org/pdf/2109.08718">pdf</a>, <a href="https://arxiv.org/format/2109.08718">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Molecular Networks">q-bio.MN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Proteome-informed machine learning studies of cocaine addiction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Gao%2C+K">Kaifu Gao</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/q-bio?searchtype=author&query=Robison%2C+A+J">Alfred J Robison</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.08718v1-abstract-short" style="display: inline;"> Cocaine addiction accounts for a large portion of substance use disorders and threatens millions of lives worldwide. There is an urgent need to come up with efficient anti-cocaine addiction drugs. Unfortunately, no medications have been approved by the Food and Drug Administration (FDA), despite the extensive effort in the past few decades. The main challenge is the intricate molecular mechanisms… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08718v1-abstract-full').style.display = 'inline'; document.getElementById('2109.08718v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.08718v1-abstract-full" style="display: none;"> Cocaine addiction accounts for a large portion of substance use disorders and threatens millions of lives worldwide. There is an urgent need to come up with efficient anti-cocaine addiction drugs. Unfortunately, no medications have been approved by the Food and Drug Administration (FDA), despite the extensive effort in the past few decades. The main challenge is the intricate molecular mechanisms of cocaine addiction, involving synergistic interactions among proteins upstream and downstream of dopamine transporter (DAT) functions impacted by cocaine. However, traditional in vivo or in vitro experiments can not address the roles of so many proteins, highlighting the need for innovative strategies in the field. We propose a proteome-informed machine learning/deep learning (ML/DL) platform to discover nearly optimal anti-cocaine addiction lead compounds. We construct and analyze proteomic protein-protein interaction (PPI) networks for cocaine dependence to identify 141 involved drug targets and represent over 60,000 associated drug candidates or experimental drugs in the latent space using an autoencoder (EA) model trained from over 104 million molecules. We build 32 ML models for cross-target analysis of these drug candidates for side effects and repurposing potential. We further screen the absorption, distribution, metabolism, excretion, and toxicity (ADMET) properties of these candidates. Our platform reveals that essentially all of the existing drug candidates, including dozens of experimental drugs, fail to pass our cross-target and ADMET screenings. Nonetheless, we have identified two nearly optimal leads for further optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08718v1-abstract-full').style.display = 'none'; document.getElementById('2109.08718v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.08148">arXiv:2109.08148</a> <span> [<a href="https://arxiv.org/pdf/2109.08148">pdf</a>, <a href="https://arxiv.org/format/2109.08148">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Review of the mechanisms of SARS-CoV-2 evolution and transmission </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.08148v1-abstract-short" style="display: inline;"> The mechanism of SARS-CoV-2 evolution and transmission is elusive and its understanding, a prerequisite to forecast emerging variants, is of paramount importance. SARS-CoV-2 evolution is driven by the mechanisms at molecular and organism scales and regulated by the transmission pathways at the population scale. In this review, we show that infectivity-based natural selection was discovered as the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08148v1-abstract-full').style.display = 'inline'; document.getElementById('2109.08148v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.08148v1-abstract-full" style="display: none;"> The mechanism of SARS-CoV-2 evolution and transmission is elusive and its understanding, a prerequisite to forecast emerging variants, is of paramount importance. SARS-CoV-2 evolution is driven by the mechanisms at molecular and organism scales and regulated by the transmission pathways at the population scale. In this review, we show that infectivity-based natural selection was discovered as the mechanism for SARS-CoV-2 evolution and transmission in July 2020. In April 2021, we proved beyond all doubt that such a natural selection via infectivity-based transmission pathway remained the sole mechanism for SARS-CoV-2 evolution. However, we reveal that antibody-disruptive co-mutations [Y449S, N501Y] debuted as a new vaccine-resistant transmission pathway of viral evolution in highly vaccinated populations a few months ago. Over one year ago, we foresaw that mutations spike protein RBD residues, 452 and 501, would "have high chances to mutate into significantly more infectious COVID-19 strains". Mutations on these residues underpin prevailing SARS-CoV-2 variants Alpha, Beta, Gamma, Delta, Epsilon, Theta, Kappa, Lambda, and Mu at present and are expected to be vital to emerging variants. We anticipate that viral evolution will combine RBD co-mutations at these two sites, creating future variants that are tens of times more infectious than the original SARS-CoV-2. Additionally, two complementary transmission pathways of viral evolution: infectivity and vaccine-resistant, will prolong our battle with COVID-19 for years. We predict that RBD co-mutation [A411S, L452R, T478K], [L452R, T478K, N501Y], [L452R, T478K, E484K, N501Y], [K417N, L452R, T478K], and [P384L, K417N, E484K, N501Y] will have high chances to grow into dominating variants due to their high infectivity and/or strong ability to break through current vaccines, calling for the development of new vaccines and antibody therapies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08148v1-abstract-full').style.display = 'none'; document.getElementById('2109.08148v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.04509">arXiv:2109.04509</a> <span> [<a href="https://arxiv.org/pdf/2109.04509">pdf</a>, <a href="https://arxiv.org/format/2109.04509">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> Emerging vaccine-breakthrough SARS-CoV-2 variants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Hozumi%2C+Y">Yuta Hozumi</a>, <a href="/search/q-bio?searchtype=author&query=Yin%2C+C">Changchuan Yin</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.04509v1-abstract-short" style="display: inline;"> The recent global surge in COVID-19 infections has been fueled by new SARS-CoV-2 variants, namely Alpha, Beta, Gamma, Delta, etc. The molecular mechanism underlying such surge is elusive due to 4,653 non-degenerate mutations on the spike protein, which is the target of most COVID-19 vaccines. The understanding of the molecular mechanism of transmission and evolution is a prerequisite to foresee th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.04509v1-abstract-full').style.display = 'inline'; document.getElementById('2109.04509v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.04509v1-abstract-full" style="display: none;"> The recent global surge in COVID-19 infections has been fueled by new SARS-CoV-2 variants, namely Alpha, Beta, Gamma, Delta, etc. The molecular mechanism underlying such surge is elusive due to 4,653 non-degenerate mutations on the spike protein, which is the target of most COVID-19 vaccines. The understanding of the molecular mechanism of transmission and evolution is a prerequisite to foresee the trend of emerging vaccine-breakthrough variants and the design of mutation-proof vaccines and monoclonal antibodies. We integrate the genotyping of 1,489,884 SARS-CoV-2 genomes isolates, 130 human antibodies, tens of thousands of mutational data points, topological data analysis, and deep learning to reveal SARS-CoV-2 evolution mechanism and forecast emerging vaccine-escape variants. We show that infectivity-strengthening and antibody-disruptive co-mutations on the S protein RBD can quantitatively explain the infectivity and virulence of all prevailing variants. We demonstrate that Lambda is as infectious as Delta but is more vaccine-resistant. We analyze emerging vaccine-breakthrough co-mutations in 20 countries, including the United Kingdom, the United States, Denmark, Brazil, and Germany, etc. We envision that natural selection through infectivity will continue to be the main mechanism for viral evolution among unvaccinated populations, while antibody disruptive co-mutations will fuel the future growth of vaccine-breakthrough variants among fully vaccinated populations. Finally, we have identified the co-mutations that have the great likelihood of becoming dominant: [A411S, L452R, T478K], [L452R, T478K, N501Y], [V401L, L452R, T478K], [K417N, L452R, T478K], [L452R, T478K, E484K, N501Y], and [P384L, K417N, E484K, N501Y]. We predict they, particularly the last four, will break through existing vaccines. We foresee an urgent need to develop new vaccines that target these co-mutations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.04509v1-abstract-full').style.display = 'none'; document.getElementById('2109.04509v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.08023">arXiv:2103.08023</a> <span> [<a href="https://arxiv.org/pdf/2103.08023">pdf</a>, <a href="https://arxiv.org/format/2103.08023">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Vaccine-escape and fast-growing mutations in the United Kingdom, the United States, Singapore, Spain, South Africa, and other COVID-19-devastated countries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Gao%2C+K">Kaifu Gao</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.08023v2-abstract-short" style="display: inline;"> Recently, the SARS-CoV-2 variants from the United Kingdom (UK), South Africa, and Brazil have received much attention for their increased infectivity, potentially high virulence, and possible threats to existing vaccines and antibody therapies. The question remains if there are other more infectious variants transmitted around the world. We carry out a large-scale study of 252,874 SARS-CoV-2 genom… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.08023v2-abstract-full').style.display = 'inline'; document.getElementById('2103.08023v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.08023v2-abstract-full" style="display: none;"> Recently, the SARS-CoV-2 variants from the United Kingdom (UK), South Africa, and Brazil have received much attention for their increased infectivity, potentially high virulence, and possible threats to existing vaccines and antibody therapies. The question remains if there are other more infectious variants transmitted around the world. We carry out a large-scale study of 252,874 SARS-CoV-2 genome isolates from patients to identify many other rapidly growing mutations on the spike (S) protein receptor-binding domain (RDB). We reveal that 88 out of 95 significant mutations that were observed more than 10 times strengthen the binding between the RBD and the host angiotensin-converting enzyme 2 (ACE2), indicating the virus evolves toward more infectious variants. In particular, we discover new fast-growing RBD mutations N439K, L452R, S477N, S477R, and N501T that also enhance the RBD and ACE2 binding. We further unveil that mutation N501Y involved in United Kingdom (UK), South Africa, and Brazil variants may moderately weaken the binding between the RBD and many known antibodies, while mutations E484K and K417N found in South Africa and Brazilian variants can potentially disrupt the binding between the RDB and many known antibodies. Among three newly identified fast-growing RBD mutations, L452R, which is now known as part of the California variant B.1.427, and N501T are able to effectively weaken the binding of many known antibodies with the RBD. Finally, we hypothesize that RBD mutations that can simultaneously make SARS-CoV-2 more infectious and disrupt the existing antibodies, called vaccine escape mutations, will pose an imminent threat to the current crop of vaccines. A list of most likely vaccine escape mutations is given, including N501Y, L452R, E484K, N501T, S494P, and K417N. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.08023v2-abstract-full').style.display = 'none'; document.getElementById('2103.08023v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.01803">arXiv:2102.01803</a> <span> [<a href="https://arxiv.org/pdf/2102.01803">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1162/neco_a_01426">10.1162/neco_a_01426 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Tracking fast and slow changes in synaptic weights from simultaneously observed pre- and postsynaptic spiking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Ganchao Wei</a>, <a href="/search/q-bio?searchtype=author&query=Stevenson%2C+I+H">Ian H. Stevenson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.01803v2-abstract-short" style="display: inline;"> Synapses change on multiple timescales, ranging from milliseconds to minutes, due to a combination of both short- and long-term plasticity. Here we develop an extension of the common Generalized Linear Model to infer both short- and long-term changes in the coupling between a pre- and post-synaptic neuron based on observed spiking activity. We model short-term synaptic plasticity using additive ef… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.01803v2-abstract-full').style.display = 'inline'; document.getElementById('2102.01803v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.01803v2-abstract-full" style="display: none;"> Synapses change on multiple timescales, ranging from milliseconds to minutes, due to a combination of both short- and long-term plasticity. Here we develop an extension of the common Generalized Linear Model to infer both short- and long-term changes in the coupling between a pre- and post-synaptic neuron based on observed spiking activity. We model short-term synaptic plasticity using additive effects that depend on the presynaptic spike timing, and we model long-term changes in both synaptic weight and baseline firing rate using point process adaptive smoothing. Using simulations, we first show that this model can accurately recover time-varying synaptic weights 1) for both depressing and facilitating synapses, 2) with a variety of long-term changes (including realistic changes, such as due to STDP), 3) with a range of pre- and post-synaptic firing rates, and 4) for both excitatory and inhibitory synapses. We then apply our model to two experimentally recorded putative synaptic connections. We find that simultaneously tracking fast changes in synaptic weights, slow changes in synaptic weights, and unexplained variations in baseline firing is essential. Omitting any one of these factors can lead to spurious inferences for the others. Altogether, this model provides a flexible framework for tracking short- and long-term variation in spike transmission. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.01803v2-abstract-full').style.display = 'none'; document.getElementById('2102.01803v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Neural Computation (2021) 33 (10) 2682-2709 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.00971">arXiv:2102.00971</a> <span> [<a href="https://arxiv.org/pdf/2102.00971">pdf</a>, <a href="https://arxiv.org/format/2102.00971">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Methodology-centered review of molecular modeling, simulation, and prediction of SARS-CoV-2 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Gao%2C+K">Kaifu Gao</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Cheng%2C+L">Limei Cheng</a>, <a href="/search/q-bio?searchtype=author&query=Frishcosy%2C+J">Jaclyn Frishcosy</a>, <a href="/search/q-bio?searchtype=author&query=Huzumi%2C+Y">Yuta Huzumi</a>, <a href="/search/q-bio?searchtype=author&query=Qiu%2C+Y">Yuchi Qiu</a>, <a href="/search/q-bio?searchtype=author&query=Schluckbier%2C+T">Tom Schluckbier</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.00971v1-abstract-short" style="display: inline;"> The deadly coronavirus disease 2019 (COVID-19) pandemic caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has gone out of control globally. Despite much effort by scientists, medical experts, and society in general, the slow progress on drug discovery and antibody therapeutic development, the unknown possible side effects of the existing vaccines, and the high transmission rat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.00971v1-abstract-full').style.display = 'inline'; document.getElementById('2102.00971v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.00971v1-abstract-full" style="display: none;"> The deadly coronavirus disease 2019 (COVID-19) pandemic caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has gone out of control globally. Despite much effort by scientists, medical experts, and society in general, the slow progress on drug discovery and antibody therapeutic development, the unknown possible side effects of the existing vaccines, and the high transmission rate of the SARS-CoV-2, remind us of the sad reality that our current understanding of the transmission, infectivity, and evolution of SARS-CoV-2 is unfortunately very limited. The major limitation is the lack of mechanistic understanding of viral-host cell interactions, the viral regulation, protein-protein interactions, including antibody-antigen binding, protein-drug binding, host immune response, etc. This limitation will likely haunt the scientific community for a long time and have a devastating consequence in combating COVID-19 and other pathogens. Notably, compared to the long-cycle, highly cost, and safety-demanding molecular-level experiments, the theoretical and computational studies are economical, speedy, and easy to perform. There exists a tsunami of the literature on molecular modeling, simulation, and prediction of SARS-CoV-2 that has become impossible to fully be covered in a review. To provide the reader a quick update about the status of molecular modeling, simulation, and prediction of SARS-CoV-2, we present a comprehensive and systematic methodology-centered narrative in the nick of time. Aspects such as molecular modeling, Monte Carlo (MC) methods, structural bioinformatics, machine learning, deep learning, and mathematical approaches are included in this review. This review will be beneficial to researchers who are looking for ways to contribute to SARS-CoV-2 studies and those who are assessing the current status in the field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.00971v1-abstract-full').style.display = 'none'; document.getElementById('2102.00971v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">99 pages, 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.15268">arXiv:2012.15268</a> <span> [<a href="https://arxiv.org/pdf/2012.15268">pdf</a>, <a href="https://arxiv.org/format/2012.15268">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> UMAP-assisted $K$-means clustering of large-scale SARS-CoV-2 mutation datasets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Hozumi%2C+Y">Yuta Hozumi</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Yin%2C+C">Changchuan Yin</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.15268v1-abstract-short" style="display: inline;"> Coronavirus disease 2019 (COVID-19) caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has a worldwide devastating effect. The understanding of evolution and transmission of SARS-CoV-2 is of paramount importance for the COVID-19 control, combating, and prevention. Due to the rapid growth of both the number of SARS-CoV-2 genome sequences and the number of unique mutations, the p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.15268v1-abstract-full').style.display = 'inline'; document.getElementById('2012.15268v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.15268v1-abstract-full" style="display: none;"> Coronavirus disease 2019 (COVID-19) caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has a worldwide devastating effect. The understanding of evolution and transmission of SARS-CoV-2 is of paramount importance for the COVID-19 control, combating, and prevention. Due to the rapid growth of both the number of SARS-CoV-2 genome sequences and the number of unique mutations, the phylogenetic analysis of SARS-CoV-2 genome isolates faces an emergent large-data challenge. We introduce a dimension-reduced $k$-means clustering strategy to tackle this challenge. We examine the performance and effectiveness of three dimension-reduction algorithms: principal component analysis (PCA), t-distributed stochastic neighbor embedding (t-SNE), and uniform manifold approximation and projection (UMAP). By using four benchmark datasets, we found that UMAP is the best-suited technique due to its stable, reliable, and efficient performance, its ability to improve clustering accuracy, especially for large Jaccard distanced-based datasets, and its superior clustering visualization. The UMAP-assisted $k$-means clustering enables us to shed light on increasingly large datasets from SARS-CoV-2 genome isolates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.15268v1-abstract-full').style.display = 'none'; document.getElementById('2012.15268v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.06357">arXiv:2010.06357</a> <span> [<a href="https://arxiv.org/pdf/2010.06357">pdf</a>, <a href="https://arxiv.org/format/2010.06357">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Prediction and mitigation of mutation threats to COVID-19 vaccines and antibody therapies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Gao%2C+K">Kaifu Gao</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guowei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.06357v2-abstract-short" style="display: inline;"> Antibody therapeutics and vaccines are among our last resort to end the raging COVID-19 pandemic. They, however, are prone to over 5,000 mutations on the spike (S) protein uncovered by a Mutation Tracker based on over 200,000 genome isolates. It is imperative to understand how mutations would impact vaccines and antibodies in the development. In this work, we study the mechanism, frequency, and ra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06357v2-abstract-full').style.display = 'inline'; document.getElementById('2010.06357v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.06357v2-abstract-full" style="display: none;"> Antibody therapeutics and vaccines are among our last resort to end the raging COVID-19 pandemic. They, however, are prone to over 5,000 mutations on the spike (S) protein uncovered by a Mutation Tracker based on over 200,000 genome isolates. It is imperative to understand how mutations would impact vaccines and antibodies in the development. In this work, we study the mechanism, frequency, and ratio of mutations on the S protein. Additionally, we use 56 antibody structures and analyze their 2D and 3D characteristics. Moreover, we predict the mutation-induced binding free energy (BFE) changes for the complexes of S protein and antibodies or ACE2. By integrating genetics, biophysics, deep learning, and algebraic topology, we reveal that most of 462 mutations on the receptor-binding domain (RBD) will weaken the binding of S protein and antibodies and disrupt the efficacy and reliability of antibody therapies and vaccines. A list of 31 vaccine escape mutants is identified, while many other disruptive mutations are detailed as well. We also unveil that about 65\% existing RBD mutations, including those variants recently found in the United Kingdom (UK) and South Africa, are binding-strengthen mutations, resulting in more infectious COVID-19 variants. We discover the disparity between the extreme values of RBD mutation-induced BFE strengthening and weakening of the bindings with antibodies and ACE2, suggesting that SARS-CoV-2 is at an advanced stage of evolution for human infection, while the human immune system is able to produce optimized antibodies. This discovery implies the vulnerability of current vaccines and antibody drugs to new mutations. Our predictions were validated by comparison with more than 1,400 deep mutations on the S protein RBD. Our results show the urgent need to develop new mutation-resistant vaccines and antibodies and to prepare for seasonal vaccinations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06357v2-abstract-full').style.display = 'none'; document.getElementById('2010.06357v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.07488">arXiv:2008.07488</a> <span> [<a href="https://arxiv.org/pdf/2008.07488">pdf</a>, <a href="https://arxiv.org/format/2008.07488">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> Host immune response driving SARS-CoV-2 evolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Hozumi%2C+Y">Yuta Hozumi</a>, <a href="/search/q-bio?searchtype=author&query=Zheng%2C+Y">Yong-Hui Zheng</a>, <a href="/search/q-bio?searchtype=author&query=Yin%2C+C">Changchuan Yin</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.07488v2-abstract-short" style="display: inline;"> The transmission and evolution of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) are of paramount importance to the controlling and combating of coronavirus disease 2019 (COVID-19) pandemic. Currently, near 15,000 SARS-CoV-2 single mutations have been recorded, having a great ramification to the development of diagnostics, vaccines, antibody therapies, and drugs. However, little is k… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.07488v2-abstract-full').style.display = 'inline'; document.getElementById('2008.07488v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.07488v2-abstract-full" style="display: none;"> The transmission and evolution of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) are of paramount importance to the controlling and combating of coronavirus disease 2019 (COVID-19) pandemic. Currently, near 15,000 SARS-CoV-2 single mutations have been recorded, having a great ramification to the development of diagnostics, vaccines, antibody therapies, and drugs. However, little is known about SARS-CoV-2 evolutionary characteristics and general trend. In this work, we present a comprehensive genotyping analysis of existing SARS-CoV-2 mutations. We reveal that host immune response via APOBEC and ADAR gene editing gives rise to near 65\% of recorded mutations. Additionally, we show that children under age five and the elderly may be at high risk from COVID-19 because of their overreacting to the viral infection. Moreover, we uncover that populations of Oceania and Africa react significantly more intensively to SARS-CoV-2 infection than those of Europe and Asia, which may explain why African Americans were shown to be at increased risk of dying from COVID-19, in addition to their high risk of getting sick from COVID-19 caused by systemic health and social inequities. Finally, our study indicates that for two viral genome sequences of the same origin, their evolution order may be determined from the ratio of mutation type C$>$T over T$>$C. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.07488v2-abstract-full').style.display = 'none'; document.getElementById('2008.07488v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.12692">arXiv:2007.12692</a> <span> [<a href="https://arxiv.org/pdf/2007.12692">pdf</a>, <a href="https://arxiv.org/format/2007.12692">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> Characterizing SARS-CoV-2 mutations in the United States </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Gao%2C+K">Kaifu Gao</a>, <a href="/search/q-bio?searchtype=author&query=Hozumi%2C+Y">Yuta Hozumi</a>, <a href="/search/q-bio?searchtype=author&query=Yin%2C+C">Changchuan Yin</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.12692v1-abstract-short" style="display: inline;"> The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has been mutating since it was first sequenced in early January 2020. The genetic variants have developed into a few distinct clusters with different properties. Since the United States (US) has the highest number of viral infected patients globally, it is essential to understand the US SARS-CoV-2. Using genotyping, sequence-alignmen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.12692v1-abstract-full').style.display = 'inline'; document.getElementById('2007.12692v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.12692v1-abstract-full" style="display: none;"> The severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has been mutating since it was first sequenced in early January 2020. The genetic variants have developed into a few distinct clusters with different properties. Since the United States (US) has the highest number of viral infected patients globally, it is essential to understand the US SARS-CoV-2. Using genotyping, sequence-alignment, time-evolution, $k$-means clustering, protein-folding stability, algebraic topology, and network theory, we reveal that the US SARS-CoV-2 has four substrains and five top US SARS-CoV-2 mutations were first detected in China (2 cases), Singapore (2 cases), and the United Kingdom (1 case). The next three top US SARS-CoV-2 mutations were first detected in the US. These eight top mutations belong to two disconnected groups. The first group consisting of 5 concurrent mutations is prevailing, while the other group with three concurrent mutations gradually fades out. Our analysis suggests that female immune systems are more active than those of males in responding to SARS-CoV-2 infections. We identify that one of the top mutations, 27964C$>$T-(S24L) on ORF8, has an unusually strong gender dependence. Based on the analysis of all mutations on the spike protein, we further uncover that three of four US SASR-CoV-2 substrains become more infectious. Our study calls for effective viral control and containing strategies in the US. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.12692v1-abstract-full').style.display = 'none'; document.getElementById('2007.12692v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 20 figures, and 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.01344">arXiv:2007.01344</a> <span> [<a href="https://arxiv.org/pdf/2007.01344">pdf</a>, <a href="https://arxiv.org/format/2007.01344">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Decoding asymptomatic COVID-19 infection and transmission </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Hozumi%2C+Y">Yuta Hozumi</a>, <a href="/search/q-bio?searchtype=author&query=Yin%2C+C">Changchuan Yin</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.01344v1-abstract-short" style="display: inline;"> Coronavirus disease 2019 (COVID-19) is a continuously devastating public health and the world economy. One of the major challenges in controlling the COVID-19 outbreak is its asymptomatic infection and transmission, which are elusive and defenseless in most situations. The pathogenicity and virulence of asymptomatic COVID-19 remain mysterious. Based on the genotyping of 20656 Severe Acute Respirat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.01344v1-abstract-full').style.display = 'inline'; document.getElementById('2007.01344v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.01344v1-abstract-full" style="display: none;"> Coronavirus disease 2019 (COVID-19) is a continuously devastating public health and the world economy. One of the major challenges in controlling the COVID-19 outbreak is its asymptomatic infection and transmission, which are elusive and defenseless in most situations. The pathogenicity and virulence of asymptomatic COVID-19 remain mysterious. Based on the genotyping of 20656 Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2) genome isolates, we reveal that asymptomatic infection is linked to SARS-CoV-2 11083G>T mutation, i.e., leucine (L) to phenylalanine (F) substitution at the residue 37 (L37F) of nonstructure protein 6 (NSP6). By analyzing the distribution of 11083G>T in various countries, we unveil that 11083G>T may correlate with the hypotoxicity of SARS-CoV-2. Moreover, we show a global decaying tendency of the 11083G>T mutation ratio indicating that 11083G>T hinders SARS-CoV-2 transmission capacity. Sequence alignment found both NSP6 and residue 37 neighborhoods are relatively conservative over a few coronaviral species, indicating their importance in regulating host cell autophagy to undermine innate cellular defense against viral infection. Using machine learning and topological data analysis, we demonstrate that mutation L37F has made NSP6 energetically less stable. The rigidity and flexibility index and several network models suggest that mutation L37F may have compromised the NSP6 function, leading to a relatively weak SARS-CoV subtype. This assessment is a good agreement with our genotyping of SARS-CoV-2 evolution and transmission across various countries and regions over the past few months. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.01344v1-abstract-full').style.display = 'none'; document.getElementById('2007.01344v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.10584">arXiv:2006.10584</a> <span> [<a href="https://arxiv.org/pdf/2006.10584">pdf</a>, <a href="https://arxiv.org/format/2006.10584">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Review of COVID-19 Antibody Therapies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Gao%2C+K">Kaifu Gao</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Nguyen%2C+D+D">Duc Duy Nguyen</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.10584v1-abstract-short" style="display: inline;"> Under the global health emergency caused by coronavirus disease 2019 (COVID-19), efficient and specific therapies are urgently needed. Compared with traditional small-molecular drugs, antibody therapies are relatively easy to develop and as specific as vaccines in targeting severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2), and thus attract much attention in the past few months. This wo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.10584v1-abstract-full').style.display = 'inline'; document.getElementById('2006.10584v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.10584v1-abstract-full" style="display: none;"> Under the global health emergency caused by coronavirus disease 2019 (COVID-19), efficient and specific therapies are urgently needed. Compared with traditional small-molecular drugs, antibody therapies are relatively easy to develop and as specific as vaccines in targeting severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2), and thus attract much attention in the past few months. This work reviews seven existing antibodies for SARS-CoV-2 spike (S) protein with three-dimensional (3D) structures deposited in the Protein Data Bank. Five antibody structures associated with SARS-CoV are evaluated for their potential in neutralizing SARS-CoV-2. The interactions of these antibodies with the S protein receptor-binding domain (RBD) are compared with those of angiotensin-converting enzyme 2 (ACE2) and RBD complexes. Due to the orders of magnitude in the discrepancies of experimental binding affinities, we introduce topological data analysis (TDA), a variety of network models, and deep learning to analyze the binding strength and therapeutic potential of the aforementioned fourteen antibody-antigen complexes. The current COVID-19 antibody clinical trials, which are not limited to the S protein target, are also reviewed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.10584v1-abstract-full').style.display = 'none'; document.getElementById('2006.10584v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 10 figures, 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.14669">arXiv:2005.14669</a> <span> [<a href="https://arxiv.org/pdf/2005.14669">pdf</a>, <a href="https://arxiv.org/format/2005.14669">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Mutations strengthened SARS-CoV-2 infectivity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chen%2C+J">Jiahui Chen</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+M">Menglun Wang</a>, <a href="/search/q-bio?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.14669v1-abstract-short" style="display: inline;"> Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) infectivity is a major concern in coronavirus disease 2019 (COVID-19) prevention and economic reopening. However, rigorous determination of SARS-COV-2 infectivity is essentially impossible owing to its continuous evolution with over 13752 single nucleotide polymorphisms (SNP) variants in six different subtypes. We develop an advanced mac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.14669v1-abstract-full').style.display = 'inline'; document.getElementById('2005.14669v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.14669v1-abstract-full" style="display: none;"> Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) infectivity is a major concern in coronavirus disease 2019 (COVID-19) prevention and economic reopening. However, rigorous determination of SARS-COV-2 infectivity is essentially impossible owing to its continuous evolution with over 13752 single nucleotide polymorphisms (SNP) variants in six different subtypes. We develop an advanced machine learning algorithm based on the algebraic topology to quantitatively evaluate the binding affinity changes of SARS-CoV-2 spike glycoprotein (S protein) and host angiotensin-converting enzyme 2 (ACE2) receptor following the mutations. Based on mutation-induced binding affinity changes, we reveal that five out of six SARS-CoV-2 subtypes have become either moderately or slightly more infectious, while one subtype has weakened its infectivity. We find that SARS-CoV-2 is slightly more infectious than SARS-CoV according to computed S protein-ACE2 binding affinity changes. Based on a systematic evaluation of all possible 3686 future mutations on the S protein receptor-binding domain (RBD), we show that most likely future mutations will make SARS-CoV-2 more infectious. Combining sequence alignment, probability analysis, and binding affinity calculation, we predict that a few residues on the receptor-binding motif (RBM), i.e., 452, 489, 500, 501, and 505, have very high chances to mutate into significantly more infectious COVID-19 strains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.14669v1-abstract-full').style.display = 'none'; document.getElementById('2005.14669v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 2 tables and 19 figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wei%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>