Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–32 of 32 results for author: <span class="mathjax">Ma, H</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/q-bio" aria-role="search"> Searching in archive <strong>q-bio</strong>. <a href="/search/?searchtype=author&query=Ma%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ma, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ma%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ma, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13280">arXiv:2411.13280</a> <span> [<a href="https://arxiv.org/pdf/2411.13280">pdf</a>, <a href="https://arxiv.org/format/2411.13280">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Structure-Based Molecule Optimization via Gradient-Guided Bayesian Update </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Qiu%2C+K">Keyue Qiu</a>, <a href="/search/q-bio?searchtype=author&query=Song%2C+Y">Yuxuan Song</a>, <a href="/search/q-bio?searchtype=author&query=Yu%2C+J">Jie Yu</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hongbo Ma</a>, <a href="/search/q-bio?searchtype=author&query=Cao%2C+Z">Ziyao Cao</a>, <a href="/search/q-bio?searchtype=author&query=Zhang%2C+Z">Zhilong Zhang</a>, <a href="/search/q-bio?searchtype=author&query=Wu%2C+Y">Yushuai Wu</a>, <a href="/search/q-bio?searchtype=author&query=Zheng%2C+M">Mingyue Zheng</a>, <a href="/search/q-bio?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+W">Wei-Ying Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13280v2-abstract-short" style="display: inline;"> Structure-based molecule optimization (SBMO) aims to optimize molecules with both continuous coordinates and discrete types against protein targets. A promising direction is to exert gradient guidance on generative models given its remarkable success in images, but it is challenging to guide discrete data and risks inconsistencies between modalities. To this end, we leverage a continuous and diffe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13280v2-abstract-full').style.display = 'inline'; document.getElementById('2411.13280v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13280v2-abstract-full" style="display: none;"> Structure-based molecule optimization (SBMO) aims to optimize molecules with both continuous coordinates and discrete types against protein targets. A promising direction is to exert gradient guidance on generative models given its remarkable success in images, but it is challenging to guide discrete data and risks inconsistencies between modalities. To this end, we leverage a continuous and differentiable space derived through Bayesian inference, presenting Molecule Joint Optimization (MolJO), the first gradient-based SBMO framework that facilitates joint guidance signals across different modalities while preserving SE(3)-equivariance. We introduce a novel backward correction strategy that optimizes within a sliding window of the past histories, allowing for a seamless trade-off between explore-and-exploit during optimization. Our proposed MolJO achieves state-of-the-art performance on CrossDocked2020 benchmark (Success Rate 51.3% , Vina Dock -9.05 and SA 0.78), more than 4x improvement in Success Rate compared to the gradient-based counterpart, and 2x "Me-Better" Ratio as much as 3D baselines. Furthermore, we extend MolJO to a wide range of optimization settings, including multi-objective optimization and challenging tasks in drug design such as R-group optimization and scaffold hopping, further underscoring its versatility and potential. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13280v2-abstract-full').style.display = 'none'; document.getElementById('2411.13280v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 17 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22472">arXiv:2410.22472</a> <span> [<a href="https://arxiv.org/pdf/2410.22472">pdf</a>, <a href="https://arxiv.org/format/2410.22472">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Learning Identifiable Factorized Causal Representations of Cellular Responses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyi Mao</a>, <a href="/search/q-bio?searchtype=author&query=Lopez%2C+R">Romain Lopez</a>, <a href="/search/q-bio?searchtype=author&query=Liu%2C+K">Kai Liu</a>, <a href="/search/q-bio?searchtype=author&query=Huetter%2C+J">Jan-Christian Huetter</a>, <a href="/search/q-bio?searchtype=author&query=Richmond%2C+D">David Richmond</a>, <a href="/search/q-bio?searchtype=author&query=Benos%2C+P+V">Panayiotis V. Benos</a>, <a href="/search/q-bio?searchtype=author&query=Qiu%2C+L">Lin Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22472v2-abstract-short" style="display: inline;"> The study of cells and their responses to genetic or chemical perturbations promises to accelerate the discovery of therapeutic targets. However, designing adequate and insightful models for such data is difficult because the response of a cell to perturbations essentially depends on its biological context (e.g., genetic background or cell type). For example, while discovering therapeutic targets,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22472v2-abstract-full').style.display = 'inline'; document.getElementById('2410.22472v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22472v2-abstract-full" style="display: none;"> The study of cells and their responses to genetic or chemical perturbations promises to accelerate the discovery of therapeutic targets. However, designing adequate and insightful models for such data is difficult because the response of a cell to perturbations essentially depends on its biological context (e.g., genetic background or cell type). For example, while discovering therapeutic targets, one may want to enrich for drugs that specifically target a certain cell type. This challenge emphasizes the need for methods that explicitly take into account potential interactions between drugs and contexts. Towards this goal, we propose a novel Factorized Causal Representation (FCR) learning method that reveals causal structure in single-cell perturbation data from several cell lines. Based on the framework of identifiable deep generative models, FCR learns multiple cellular representations that are disentangled, comprised of covariate-specific ($\mathbf{z}_x$), treatment-specific ($\mathbf{z}_{t}$), and interaction-specific ($\mathbf{z}_{tx}$) blocks. Based on recent advances in non-linear ICA theory, we prove the component-wise identifiability of $\mathbf{z}_{tx}$ and block-wise identifiability of $\mathbf{z}_t$ and $\mathbf{z}_x$. Then, we present our implementation of FCR, and empirically demonstrate that it outperforms state-of-the-art baselines in various tasks across four single-cell datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22472v2-abstract-full').style.display = 'none'; document.getElementById('2410.22472v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00709">arXiv:2410.00709</a> <span> [<a href="https://arxiv.org/pdf/2410.00709">pdf</a>, <a href="https://arxiv.org/format/2410.00709">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Binding Affinity Prediction: From Conventional to Machine Learning-Based Approaches </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Liu%2C+X">Xuefeng Liu</a>, <a href="/search/q-bio?searchtype=author&query=Jiang%2C+S">Songhao Jiang</a>, <a href="/search/q-bio?searchtype=author&query=Duan%2C+X">Xiaotian Duan</a>, <a href="/search/q-bio?searchtype=author&query=Vasan%2C+A">Archit Vasan</a>, <a href="/search/q-bio?searchtype=author&query=Liu%2C+C">Chong Liu</a>, <a href="/search/q-bio?searchtype=author&query=Tien%2C+C">Chih-chan Tien</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Heng Ma</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Xia%2C+F">Fangfang Xia</a>, <a href="/search/q-bio?searchtype=author&query=Foster%2C+I+T">Ian T. Foster</a>, <a href="/search/q-bio?searchtype=author&query=Stevens%2C+R+L">Rick L. Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00709v1-abstract-short" style="display: inline;"> Protein-ligand binding is the process by which a small molecule (drug or inhibitor) attaches to a target protein. The binding affinity, which refers to the strength of this interaction, is central to many important problems in bioinformatics such as drug design. An extensive amount of work has been devoted to predicting binding affinity over the past decades due to its significance. In this paper,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00709v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00709v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00709v1-abstract-full" style="display: none;"> Protein-ligand binding is the process by which a small molecule (drug or inhibitor) attaches to a target protein. The binding affinity, which refers to the strength of this interaction, is central to many important problems in bioinformatics such as drug design. An extensive amount of work has been devoted to predicting binding affinity over the past decades due to its significance. In this paper, we review all significant recent works, focusing on the methods, features, and benchmark datasets. We have observed a rising trend in the use of traditional machine learning and deep learning models for predicting binding affinity, accompanied by an increasing amount of data on proteins and small drug-like molecules. While prediction results are constantly improving, we also identify several open questions and potential directions that remain unexplored in the field. This paper could serve as an excellent starting point for machine learning researchers who wish to engage in the study of binding affinity, or for anyone with general interests in machine learning, drug discovery, and bioinformatics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00709v1-abstract-full').style.display = 'none'; document.getElementById('2410.00709v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00473">arXiv:2410.00473</a> <span> [<a href="https://arxiv.org/pdf/2410.00473">pdf</a>, <a href="https://arxiv.org/format/2410.00473">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Uncertainty-aware t-distributed Stochastic Neighbor Embedding for Single-cell RNA-seq Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hui Ma</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00473v1-abstract-short" style="display: inline;"> Nonlinear data visualization using t-distributed stochastic neighbor embedding (t-SNE) enables the representation of complex single-cell transcriptomic landscapes in two or three dimensions to depict biological populations accurately. However, t-SNE often fails to account for uncertainties in the original dataset, leading to misleading visualizations where cell subsets with noise appear indistingu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00473v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00473v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00473v1-abstract-full" style="display: none;"> Nonlinear data visualization using t-distributed stochastic neighbor embedding (t-SNE) enables the representation of complex single-cell transcriptomic landscapes in two or three dimensions to depict biological populations accurately. However, t-SNE often fails to account for uncertainties in the original dataset, leading to misleading visualizations where cell subsets with noise appear indistinguishable. To address these challenges, we introduce uncertainty-aware t-SNE (Ut-SNE), a noise-defending visualization tool tailored for uncertain single-cell RNA-seq data. By creating a probabilistic representation for each sample, Our Ut-SNE accurately incorporates noise about transcriptomic variability into the visual interpretation of single-cell RNA sequencing data, revealing significant uncertainties in transcriptomic variability. Through various examples, we showcase the practical value of Ut-SNE and underscore the significance of incorporating uncertainty awareness into data visualization practices. This versatile uncertainty-aware visualization tool can be easily adapted to other scientific domains beyond single-cell RNA sequencing, making them valuable resources for high-dimensional data analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00473v1-abstract-full').style.display = 'none'; document.getElementById('2410.00473v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19113">arXiv:2406.19113</a> <span> [<a href="https://arxiv.org/pdf/2406.19113">pdf</a>, <a href="https://arxiv.org/format/2406.19113">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> MegIS: High-Performance, Energy-Efficient, and Low-Cost Metagenomic Analysis with In-Storage Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Mustafa%2C+H">Harun Mustafa</a>, <a href="/search/q-bio?searchtype=author&query=Gollwitzer%2C+A">Arvid Gollwitzer</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Eudine%2C+J">Julien Eudine</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Jo毛l Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Park%2C+J">Jisung Park</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19113v1-abstract-short" style="display: inline;"> Metagenomics has led to significant advances in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases. Metagenomic analysis suffers from significant data movement overhead due to moving large amounts of low-reuse data from the storage system. In-storag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19113v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19113v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19113v1-abstract-full" style="display: none;"> Metagenomics has led to significant advances in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases. Metagenomic analysis suffers from significant data movement overhead due to moving large amounts of low-reuse data from the storage system. In-storage processing can be a fundamental solution for reducing this overhead. However, designing an in-storage processing system for metagenomics is challenging because existing approaches to metagenomic analysis cannot be directly implemented in storage effectively due to the hardware limitations of modern SSDs. We propose MegIS, the first in-storage processing system designed to significantly reduce the data movement overhead of the end-to-end metagenomic analysis pipeline. MegIS is enabled by our lightweight design that effectively leverages and orchestrates processing inside and outside the storage system. We address in-storage processing challenges for metagenomics via specialized and efficient 1) task partitioning, 2) data/computation flow coordination, 3) storage technology-aware algorithmic optimizations, 4) data mapping, and 5) lightweight in-storage accelerators. MegIS's design is flexible, capable of supporting different types of metagenomic input datasets, and can be integrated into various metagenomic analysis pipelines. Our evaluation shows that MegIS outperforms the state-of-the-art performance- and accuracy-optimized software metagenomic tools by 2.7$\times$-37.2$\times$ and 6.9$\times$-100.2$\times$, respectively, while matching the accuracy of the accuracy-optimized tool. MegIS achieves 1.5$\times$-5.1$\times$ speedup compared to the state-of-the-art metagenomic hardware-accelerated (using processing-in-memory) tool, while achieving significantly higher accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19113v1-abstract-full').style.display = 'none'; document.getElementById('2406.19113v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in ISCA 2024. arXiv admin note: substantial text overlap with arXiv:2311.12527</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14842">arXiv:2406.14842</a> <span> [<a href="https://arxiv.org/pdf/2406.14842">pdf</a>, <a href="https://arxiv.org/format/2406.14842">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Online t-SNE for single-cell RNA-seq </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hui Ma</a>, <a href="/search/q-bio?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14842v1-abstract-short" style="display: inline;"> Due to the sequential sample arrival, changing experiment conditions, and evolution of knowledge, the demand to continually visualize evolving structures of sequential and diverse single-cell RNA-sequencing (scRNA-seq) data becomes indispensable. However, as one of the state-of-the-art visualization and analysis methods for scRNA-seq, t-distributed stochastic neighbor embedding (t-SNE) merely visu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14842v1-abstract-full').style.display = 'inline'; document.getElementById('2406.14842v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14842v1-abstract-full" style="display: none;"> Due to the sequential sample arrival, changing experiment conditions, and evolution of knowledge, the demand to continually visualize evolving structures of sequential and diverse single-cell RNA-sequencing (scRNA-seq) data becomes indispensable. However, as one of the state-of-the-art visualization and analysis methods for scRNA-seq, t-distributed stochastic neighbor embedding (t-SNE) merely visualizes static scRNA-seq data offline and fails to meet the demand well. To address these challenges, we introduce online t-SNE to seamlessly integrate sequential scRNA-seq data. Online t-SNE achieves this by leveraging the embedding space of old samples, exploring the embedding space of new samples, and aligning the two embedding spaces on the fly. Consequently, online t-SNE dramatically enables the continual discovery of new structures and high-quality visualization of new scRNA-seq data without retraining from scratch. We showcase the formidable visualization capabilities of online t-SNE across diverse sequential scRNA-seq datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14842v1-abstract-full').style.display = 'none'; document.getElementById('2406.14842v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08961">arXiv:2406.08961</a> <span> [<a href="https://arxiv.org/pdf/2406.08961">pdf</a>, <a href="https://arxiv.org/format/2406.08961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SIU: A Million-Scale Structural Small Molecule-Protein Interaction Dataset for Unbiased Bioactivity Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Huang%2C+Y">Yanwen Huang</a>, <a href="/search/q-bio?searchtype=author&query=Gao%2C+B">Bowen Gao</a>, <a href="/search/q-bio?searchtype=author&query=Jia%2C+Y">Yinjun Jia</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hongbo Ma</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/q-bio?searchtype=author&query=Zhang%2C+Y">Ya-Qin Zhang</a>, <a href="/search/q-bio?searchtype=author&query=Lan%2C+Y">Yanyan Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08961v1-abstract-short" style="display: inline;"> Small molecules play a pivotal role in modern medicine, and scrutinizing their interactions with protein targets is essential for the discovery and development of novel, life-saving therapeutics. The term "bioactivity" encompasses various biological effects resulting from these interactions, including both binding and functional responses. The magnitude of bioactivity dictates the therapeutic or t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08961v1-abstract-full').style.display = 'inline'; document.getElementById('2406.08961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08961v1-abstract-full" style="display: none;"> Small molecules play a pivotal role in modern medicine, and scrutinizing their interactions with protein targets is essential for the discovery and development of novel, life-saving therapeutics. The term "bioactivity" encompasses various biological effects resulting from these interactions, including both binding and functional responses. The magnitude of bioactivity dictates the therapeutic or toxic pharmacological outcomes of small molecules, rendering accurate bioactivity prediction crucial for the development of safe and effective drugs. However, existing structural datasets of small molecule-protein interactions are often limited in scale and lack systematically organized bioactivity labels, thereby impeding our understanding of these interactions and precise bioactivity prediction. In this study, we introduce a comprehensive dataset of small molecule-protein interactions, consisting of over a million binding structures, each annotated with real biological activity labels. This dataset is designed to facilitate unbiased bioactivity prediction. We evaluated several classical models on this dataset, and the results demonstrate that the task of unbiased bioactivity prediction is challenging yet essential. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08961v1-abstract-full').style.display = 'none'; document.getElementById('2406.08961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00670">arXiv:2404.00670</a> <span> [<a href="https://arxiv.org/pdf/2404.00670">pdf</a>, <a href="https://arxiv.org/format/2404.00670">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Statistical Analysis by Semiparametric Additive Regression and LSTM-FCN Based Hierarchical Classification for Computer Vision Quantification of Parkinsonian Bradykinesia </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Cho%2C+Y">Youngseo Cho</a>, <a href="/search/q-bio?searchtype=author&query=Kwak%2C+I+H">In Hee Kwak</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+D">Dohyeon Kim</a>, <a href="/search/q-bio?searchtype=author&query=Na%2C+J">Jinhee Na</a>, <a href="/search/q-bio?searchtype=author&query=Sung%2C+H">Hanjoo Sung</a>, <a href="/search/q-bio?searchtype=author&query=Lee%2C+J">Jeongjae Lee</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+Y+E">Young Eun Kim</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hyeo-il Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00670v1-abstract-short" style="display: inline;"> Bradykinesia, characterized by involuntary slowing or decrement of movement, is a fundamental symptom of Parkinson's Disease (PD) and is vital for its clinical diagnosis. Despite various methodologies explored to quantify bradykinesia, computer vision-based approaches have shown promising results. However, these methods often fall short in adequately addressing key bradykinesia characteristics in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00670v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00670v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00670v1-abstract-full" style="display: none;"> Bradykinesia, characterized by involuntary slowing or decrement of movement, is a fundamental symptom of Parkinson's Disease (PD) and is vital for its clinical diagnosis. Despite various methodologies explored to quantify bradykinesia, computer vision-based approaches have shown promising results. However, these methods often fall short in adequately addressing key bradykinesia characteristics in repetitive limb movements: "occasional arrest" and "decrement in amplitude." This research advances vision-based quantification of bradykinesia by introducing nuanced numerical analysis to capture decrement in amplitudes and employing a simple deep learning technique, LSTM-FCN, for precise classification of occasional arrests. Our approach structures the classification process hierarchically, tailoring it to the unique dynamics of bradykinesia in PD. Statistical analysis of the extracted features, including those representing arrest and fatigue, has demonstrated their statistical significance in most cases. This finding underscores the importance of considering "occasional arrest" and "decrement in amplitude" in bradykinesia quantification of limb movement. Our enhanced diagnostic tool has been rigorously tested on an extensive dataset comprising 1396 motion videos from 310 PD patients, achieving an accuracy of 80.3%. The results confirm the robustness and reliability of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00670v1-abstract-full').style.display = 'none'; document.getElementById('2404.00670v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12527">arXiv:2311.12527</a> <span> [<a href="https://arxiv.org/pdf/2311.12527">pdf</a>, <a href="https://arxiv.org/format/2311.12527">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> MetaStore: High-Performance Metagenomic Analysis via In-Storage Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Mustafa%2C+H">Harun Mustafa</a>, <a href="/search/q-bio?searchtype=author&query=Gollwitzer%2C+A">Arvid Gollwitzer</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Eudine%2C+J">Julien Eudine</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Haiyu Ma</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Jo毛l Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Park%2C+J">Jisung Park</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12527v1-abstract-short" style="display: inline;"> Metagenomics has led to significant advancements in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases containing information on different species' genomes. Metagenomic analysis suffers from significant data movement overhead due to moving large amo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12527v1-abstract-full').style.display = 'inline'; document.getElementById('2311.12527v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12527v1-abstract-full" style="display: none;"> Metagenomics has led to significant advancements in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases containing information on different species' genomes. Metagenomic analysis suffers from significant data movement overhead due to moving large amounts of low-reuse data from the storage system to the rest of the system. In-storage processing can be a fundamental solution for reducing data movement overhead. However, designing an in-storage processing system for metagenomics is challenging because none of the existing approaches can be directly implemented in storage effectively due to the hardware limitations of modern SSDs. We propose MetaStore, the first in-storage processing system designed to significantly reduce the data movement overhead of end-to-end metagenomic analysis. MetaStore is enabled by our lightweight and cooperative design that effectively leverages and orchestrates processing inside and outside the storage system. Through our detailed analysis of the end-to-end metagenomic analysis pipeline and careful hardware/software co-design, we address in-storage processing challenges for metagenomics via specialized and efficient 1) task partitioning, 2) data/computation flow coordination, 3) storage technology-aware algorithmic optimizations, 4) light-weight in-storage accelerators, and 5) data mapping. Our evaluation shows that MetaStore outperforms the state-of-the-art performance- and accuracy-optimized software metagenomic tools by 2.7-37.2$\times$ and 6.9-100.2$\times$, respectively, while matching the accuracy of the accuracy-optimized tool. MetaStore achieves 1.5-5.1$\times$ speedup compared to the state-of-the-art metagenomic hardware-accelerated tool, while achieving significantly higher accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12527v1-abstract-full').style.display = 'none'; document.getElementById('2311.12527v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04366">arXiv:2310.04366</a> <span> [<a href="https://arxiv.org/pdf/2310.04366">pdf</a>, <a href="https://arxiv.org/format/2310.04366">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> Swordfish: A Framework for Evaluating Deep Neural Network-based Basecalling using Computation-In-Memory with Non-Ideal Memristors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Shahroodi%2C+T">Taha Shahroodi</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Zahedi%2C+M">Mahdi Zahedi</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Wong%2C+S">Stephan Wong</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a>, <a href="/search/q-bio?searchtype=author&query=Hamdioui%2C+S">Said Hamdioui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04366v2-abstract-short" style="display: inline;"> Basecalling, an essential step in many genome analysis studies, relies on large Deep Neural Networks (DNNs) to achieve high accuracy. Unfortunately, these DNNs are computationally slow and inefficient, leading to considerable delays and resource constraints in the sequence analysis process. A Computation-In-Memory (CIM) architecture using memristors can significantly accelerate the performance of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04366v2-abstract-full').style.display = 'inline'; document.getElementById('2310.04366v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04366v2-abstract-full" style="display: none;"> Basecalling, an essential step in many genome analysis studies, relies on large Deep Neural Networks (DNNs) to achieve high accuracy. Unfortunately, these DNNs are computationally slow and inefficient, leading to considerable delays and resource constraints in the sequence analysis process. A Computation-In-Memory (CIM) architecture using memristors can significantly accelerate the performance of DNNs. However, inherent device non-idealities and architectural limitations of such designs can greatly degrade the basecalling accuracy, which is critical for accurate genome analysis. To facilitate the adoption of memristor-based CIM designs for basecalling, it is important to (1) conduct a comprehensive analysis of potential CIM architectures and (2) develop effective strategies for mitigating the possible adverse effects of inherent device non-idealities and architectural limitations. This paper proposes Swordfish, a novel hardware/software co-design framework that can effectively address the two aforementioned issues. Swordfish incorporates seven circuit and device restrictions or non-idealities from characterized real memristor-based chips. Swordfish leverages various hardware/software co-design solutions to mitigate the basecalling accuracy loss due to such non-idealities. To demonstrate the effectiveness of Swordfish, we take Bonito, the state-of-the-art (i.e., accurate and fast), open-source basecaller as a case study. Our experimental results using Sword-fish show that a CIM architecture can realistically accelerate Bonito for a wide range of real datasets by an average of 25.7x, with an accuracy loss of 6.01%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04366v2-abstract-full').style.display = 'none'; document.getElementById('2310.04366v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in 56th IEEE/ACM International Symposium on Microarchitecture (MICRO), 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.09200">arXiv:2301.09200</a> <span> [<a href="https://arxiv.org/pdf/2301.09200">pdf</a>, <a href="https://arxiv.org/format/2301.09200">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/bioinformatics/btad272">10.1093/bioinformatics/btad272 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> RawHash: Enabling Fast and Accurate Real-Time Analysis of Raw Nanopore Signals for Large Genomes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.09200v4-abstract-short" style="display: inline;"> Nanopore sequencers generate electrical raw signals in real-time while sequencing long genomic strands. These raw signals can be analyzed as they are generated, providing an opportunity for real-time genome analysis. An important feature of nanopore sequencing, Read Until, can eject strands from sequencers without fully sequencing them, which provides opportunities to computationally reduce the se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.09200v4-abstract-full').style.display = 'inline'; document.getElementById('2301.09200v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.09200v4-abstract-full" style="display: none;"> Nanopore sequencers generate electrical raw signals in real-time while sequencing long genomic strands. These raw signals can be analyzed as they are generated, providing an opportunity for real-time genome analysis. An important feature of nanopore sequencing, Read Until, can eject strands from sequencers without fully sequencing them, which provides opportunities to computationally reduce the sequencing time and cost. However, existing works utilizing Read Until either 1) require powerful computational resources that may not be available for portable sequencers or 2) lack scalability for large genomes, rendering them inaccurate or ineffective. We propose RawHash, the first mechanism that can accurately and efficiently perform real-time analysis of nanopore raw signals for large genomes using a hash-based similarity search. To enable this, RawHash ensures the signals corresponding to the same DNA content lead to the same hash value, regardless of the slight variations in these signals. RawHash achieves an accurate hash-based similarity search via an effective quantization of the raw signals such that signals corresponding to the same DNA content have the same quantized value and, subsequently, the same hash value. We evaluate RawHash on three applications: 1) read mapping, 2) relative abundance estimation, and 3) contamination analysis. Our evaluations show that RawHash is the only tool that can provide high accuracy and high throughput for analyzing large genomes in real-time. When compared to the state-of-the-art techniques, UNCALLED and Sigmap, RawHash provides 1) 25.8x and 3.4x better average throughput and 2) significantly better accuracy for large genomes, respectively. Source code is available at https://github.com/CMU-SAFARI/RawHash. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.09200v4-abstract-full').style.display = 'none'; document.getElementById('2301.09200v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in proceedings of ISMB/ECCB 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.01663">arXiv:2212.01663</a> <span> [<a href="https://arxiv.org/pdf/2212.01663">pdf</a>, <a href="https://arxiv.org/format/2212.01663">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biological Physics">physics.bio-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1162/neco_a_01612">10.1162/neco_a_01612 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Self-organization of nonlinearly coupled neural fluctuations into synergistic population codes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hengyuan Ma</a>, <a href="/search/q-bio?searchtype=author&query=Qi%2C+Y">Yang Qi</a>, <a href="/search/q-bio?searchtype=author&query=Gong%2C+P">Pulin Gong</a>, <a href="/search/q-bio?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/q-bio?searchtype=author&query=Lu%2C+W">Wenlian Lu</a>, <a href="/search/q-bio?searchtype=author&query=Feng%2C+J">Jianfeng Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.01663v3-abstract-short" style="display: inline;"> Neural activity in the brain exhibits correlated fluctuations that may strongly influence the properties of neural population coding. However, how such correlated neural fluctuations may arise from the intrinsic neural circuit dynamics and subsequently affect the computational properties of neural population activity remains poorly understood. The main difficulty lies in resolving the nonlinear co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.01663v3-abstract-full').style.display = 'inline'; document.getElementById('2212.01663v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.01663v3-abstract-full" style="display: none;"> Neural activity in the brain exhibits correlated fluctuations that may strongly influence the properties of neural population coding. However, how such correlated neural fluctuations may arise from the intrinsic neural circuit dynamics and subsequently affect the computational properties of neural population activity remains poorly understood. The main difficulty lies in resolving the nonlinear coupling between correlated fluctuations with the overall dynamics of the system. In this study, we investigate the emergence of synergistic neural population codes from the intrinsic dynamics of correlated neural fluctuations in a neural circuit model capturing realistic nonlinear noise coupling of spiking neurons. We show that a rich repertoire of spatial correlation patterns naturally emerges in a bump attractor network and further reveals the dynamical regime under which the interplay between differential and noise correlations leads to synergistic codes. Moreover, we find that negative correlations may induce stable bound states between two bumps, a phenomenon previously unobserved in firing rate models. These noise-induced effects of bump attractors lead to a number of computational advantages including enhanced working memory capacity and efficient spatiotemporal multiplexing and can account for a range of cognitive and behavioral phenomena related to working memory. This study offers a dynamical approach to investigating realistic correlated neural fluctuations and insights to their roles in cortical computations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.01663v3-abstract-full').style.display = 'none'; document.getElementById('2212.01663v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">code is available at https://github.com/AwakerMhy/moment_nn/tree/main</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Neural Comput. 2023 Sep 19:1-30 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.01542">arXiv:2212.01542</a> <span> [<a href="https://arxiv.org/pdf/2212.01542">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Detection of brain activations induced by naturalistic stimuli in a pseudo model-driven way </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Liu%2C+J">Jiangcong Liu</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hao Ma</a>, <a href="/search/q-bio?searchtype=author&query=Guan%2C+Y">Yun Guan</a>, <a href="/search/q-bio?searchtype=author&query=Wu%2C+F">Fan Wu</a>, <a href="/search/q-bio?searchtype=author&query=Xu%2C+L">Le Xu</a>, <a href="/search/q-bio?searchtype=author&query=Zhang%2C+Y">Yang Zhang</a>, <a href="/search/q-bio?searchtype=author&query=Tian%2C+L">Lixia Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.01542v1-abstract-short" style="display: inline;"> Naturalistic fMRI has been suggested to be a powerful alternative for investigations of human brain function. Stimulus-induced activation has been playing an essential role in fMRI-based brain function analyses. Due to the complexity of the stimuli, however, detection of activations induced by naturalistic stimuli (AINSs) has been a tricky problem, as AINS cannot be detected simply in a model-driv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.01542v1-abstract-full').style.display = 'inline'; document.getElementById('2212.01542v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.01542v1-abstract-full" style="display: none;"> Naturalistic fMRI has been suggested to be a powerful alternative for investigations of human brain function. Stimulus-induced activation has been playing an essential role in fMRI-based brain function analyses. Due to the complexity of the stimuli, however, detection of activations induced by naturalistic stimuli (AINSs) has been a tricky problem, as AINS cannot be detected simply in a model-driven way. In this study, we proposed a method to detect AINS in a pseudo model-driven way. Inspired by the strategy of utilizing the commonalities among the brains exposed to the same stimuli for inter-subject correlation analysis, we established response models for one subject by averaging the fMRI signals across several other subjects, and then detected AINSs of the subject using general linear model. We evaluated the effectiveness of AINS with both statistical and predictive analyses on individual differences in sex and intelligence quotient (IQ), based on the four movie fMRI runs included in the Human Connectome Project dataset. The results indicate that AINS is not only sensitive to sex- and IQ-related differences, but also specific enough to decode individuals' sex and IQ. Specifically, activations in brain regions associated with visual-spatial processing were observed to be consistently stronger in the males, and individuals with higher IQ exhibited consistently stronger activations in regions within the visual and the default mode networks. Predictions of individuals' sex and IQ were significantly better than those based on random labels (P < 0.005). Taken together, AINS advanced in this study can be an effective evaluation of human brain function. The conceptual simplicity and easy application of its detection may make AINS a favorable choice for future brain function analyses and personalized medicine based on naturalistic fMRI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.01542v1-abstract-full').style.display = 'none'; document.getElementById('2212.01542v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">39 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.08600">arXiv:2209.08600</a> <span> [<a href="https://arxiv.org/pdf/2209.08600">pdf</a>, <a href="https://arxiv.org/format/2209.08600">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> GenPIP: In-Memory Acceleration of Genome Analysis via Tight Integration of Basecalling and Read Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Baranwal%2C+A">Akanksha Baranwal</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Manglik%2C+A">Aditya Manglik</a>, <a href="/search/q-bio?searchtype=author&query=Alserr%2C+N+A">Nour Almadhoun Alserr</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.08600v2-abstract-short" style="display: inline;"> Nanopore sequencing is a widely-used high-throughput genome sequencing technology that can sequence long fragments of a genome into raw electrical signals at low cost. Nanopore sequencing requires two computationally-costly processing steps for accurate downstream genome analysis. The first step, basecalling, translates the raw electrical signals into nucleotide bases (i.e., A, C, G, T). The secon… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.08600v2-abstract-full').style.display = 'inline'; document.getElementById('2209.08600v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.08600v2-abstract-full" style="display: none;"> Nanopore sequencing is a widely-used high-throughput genome sequencing technology that can sequence long fragments of a genome into raw electrical signals at low cost. Nanopore sequencing requires two computationally-costly processing steps for accurate downstream genome analysis. The first step, basecalling, translates the raw electrical signals into nucleotide bases (i.e., A, C, G, T). The second step, read mapping, finds the correct location of a read in a reference genome. In existing genome analysis pipelines, basecalling and read mapping are executed separately. We observe in this work that such separate execution of the two most time-consuming steps inherently leads to (1) significant data movement and (2) redundant computations on the data, slowing down the genome analysis pipeline. This paper proposes GenPIP, an in-memory genome analysis accelerator that tightly integrates basecalling and read mapping. GenPIP improves the performance of the genome analysis pipeline with two key mechanisms: (1) in-memory fine-grained collaborative execution of the major genome analysis steps in parallel; (2) a new technique for early-rejection of low-quality and unmapped reads to timely stop the execution of genome analysis for such reads, reducing inefficient computation. Our experiments show that, for the execution of the genome analysis pipeline, GenPIP provides 41.6X (8.4X) speedup and 32.8X (20.8X) energy savings with negligible accuracy loss compared to the state-of-the-art software genome analysis tools executed on a state-of-the-art CPU (GPU). Compared to a design that combines state-of-the-art in-memory basecalling and read mapping accelerators, GenPIP provides 1.39X speedup and 1.37X energy savings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.08600v2-abstract-full').style.display = 'none'; document.getElementById('2209.08600v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.07734">arXiv:2207.07734</a> <span> [<a href="https://arxiv.org/pdf/2207.07734">pdf</a>, <a href="https://arxiv.org/format/2207.07734">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="General Literature">cs.GL</span> </div> </div> <p class="title is-5 mathjax"> COEM: Cross-Modal Embedding for MetaCell Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyi Mao</a>, <a href="/search/q-bio?searchtype=author&query=Jia%2C+M">Minxue Jia</a>, <a href="/search/q-bio?searchtype=author&query=Dou%2C+J+X">Jason Xiaotian Dou</a>, <a href="/search/q-bio?searchtype=author&query=Zhang%2C+H">Haotian Zhang</a>, <a href="/search/q-bio?searchtype=author&query=Benos%2C+P+V">Panayiotis V. Benos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.07734v2-abstract-short" style="display: inline;"> Metacells are disjoint and homogeneous groups of single-cell profiles, representing discrete and highly granular cell states. Existing metacell algorithms tend to use only one modality to infer metacells, even though single-cell multi-omics datasets profile multiple molecular modalities within the same cell. Here, we present \textbf{C}ross-M\textbf{O}dal \textbf{E}mbedding for \textbf{M}etaCell Id… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07734v2-abstract-full').style.display = 'inline'; document.getElementById('2207.07734v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.07734v2-abstract-full" style="display: none;"> Metacells are disjoint and homogeneous groups of single-cell profiles, representing discrete and highly granular cell states. Existing metacell algorithms tend to use only one modality to infer metacells, even though single-cell multi-omics datasets profile multiple molecular modalities within the same cell. Here, we present \textbf{C}ross-M\textbf{O}dal \textbf{E}mbedding for \textbf{M}etaCell Identification (COEM), which utilizes an embedded space leveraging the information of both scATAC-seq and scRNA-seq to perform aggregation, balancing the trade-off between fine resolution and sufficient sequencing coverage. COEM outperforms the state-of-the-art method SEACells by efficiently identifying accurate and well-separated metacells across datasets with continuous and discrete cell types. Furthermore, COEM significantly improves peak-to-gene association analyses, and facilitates complex gene regulatory inference tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07734v2-abstract-full').style.display = 'none'; document.getElementById('2207.07734v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures, ICML workshop on computational biology</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.07957">arXiv:2205.07957</a> <span> [<a href="https://arxiv.org/pdf/2205.07957">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Going From Molecules to Genomic Variations to Scientific Discovery: Intelligent Algorithms and Architectures for Intelligent Genome Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Almadhoun%2C+N">Nour Almadhoun</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Gomez-Luna%2C+J">Juan Gomez-Luna</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.07957v1-abstract-short" style="display: inline;"> We now need more than ever to make genome analysis more intelligent. We need to read, analyze, and interpret our genomes not only quickly, but also accurately and efficiently enough to scale the analysis to population level. There currently exist major computational bottlenecks and inefficiencies throughout the entire genome analysis pipeline, because state-of-the-art genome sequencing technologie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.07957v1-abstract-full').style.display = 'inline'; document.getElementById('2205.07957v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.07957v1-abstract-full" style="display: none;"> We now need more than ever to make genome analysis more intelligent. We need to read, analyze, and interpret our genomes not only quickly, but also accurately and efficiently enough to scale the analysis to population level. There currently exist major computational bottlenecks and inefficiencies throughout the entire genome analysis pipeline, because state-of-the-art genome sequencing technologies are still not able to read a genome in its entirety. We describe the ongoing journey in significantly improving the performance, accuracy, and efficiency of genome analysis using intelligent algorithms and hardware architectures. We explain state-of-the-art algorithmic methods and hardware-based acceleration approaches for each step of the genome analysis pipeline and provide experimental evaluations. Algorithmic approaches exploit the structure of the genome as well as the structure of the underlying hardware. Hardware-based acceleration approaches exploit specialized microarchitectures or various execution paradigms (e.g., processing inside or near memory) along with algorithmic changes, leading to new hardware/software co-designed systems. We conclude with a foreshadowing of future challenges, benefits, and research directions triggered by the development of both very low cost yet highly error prone new sequencing technologies and specialized hardware chips for genomics. We hope that these efforts and the challenges we discuss provide a foundation for future work in making genome analysis more intelligent. The analysis script and data used in our experimental evaluation are available at: https://github.com/CMU-SAFARI/Molecules2Variations <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.07957v1-abstract-full').style.display = 'none'; document.getElementById('2205.07957v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2008.00961</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.14006">arXiv:2203.14006</a> <span> [<a href="https://arxiv.org/pdf/2203.14006">pdf</a>, <a href="https://arxiv.org/format/2203.14006">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Analysis, Statistics and Probability">physics.data-an</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Continuity scaling: A rigorous framework for detecting and quantifying causality accurately </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ying%2C+X">Xiong Ying</a>, <a href="/search/q-bio?searchtype=author&query=Leng%2C+S">Si-Yang Leng</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Huan-Fei Ma</a>, <a href="/search/q-bio?searchtype=author&query=Nie%2C+Q">Qing Nie</a>, <a href="/search/q-bio?searchtype=author&query=Lai%2C+Y">Ying-Cheng Lai</a>, <a href="/search/q-bio?searchtype=author&query=Lin%2C+W">Wei Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.14006v1-abstract-short" style="display: inline;"> Data based detection and quantification of causation in complex, nonlinear dynamical systems is of paramount importance to science, engineering and beyond. Inspired by the widely used methodology in recent years, the cross-map-based techniques, we develop a general framework to advance towards a comprehensive understanding of dynamical causal mechanisms, which is consistent with the natural interp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.14006v1-abstract-full').style.display = 'inline'; document.getElementById('2203.14006v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.14006v1-abstract-full" style="display: none;"> Data based detection and quantification of causation in complex, nonlinear dynamical systems is of paramount importance to science, engineering and beyond. Inspired by the widely used methodology in recent years, the cross-map-based techniques, we develop a general framework to advance towards a comprehensive understanding of dynamical causal mechanisms, which is consistent with the natural interpretation of causality. In particular, instead of measuring the smoothness of the cross map as conventionally implemented, we define causation through measuring the {\it scaling law} for the continuity of the investigated dynamical system directly. The uncovered scaling law enables accurate, reliable, and efficient detection of causation and assessment of its strength in general complex dynamical systems, outperforming those existing representative methods. The continuity scaling based framework is rigorously established and demonstrated using datasets from model complex systems and the real world. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.14006v1-abstract-full').style.display = 'none'; document.getElementById('2203.14006v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 figures; The article has been peer reviewed and accepted by RESEARCH</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.10400">arXiv:2202.10400</a> <span> [<a href="https://arxiv.org/pdf/2202.10400">pdf</a>, <a href="https://arxiv.org/format/2202.10400">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Operating Systems">cs.OS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> GenStore: A High-Performance and Energy-Efficient In-Storage Computing System for Genome Sequence Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Park%2C+J">Jisung Park</a>, <a href="/search/q-bio?searchtype=author&query=Mustafa%2C+H">Harun Mustafa</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J">Jeremie Kim</a>, <a href="/search/q-bio?searchtype=author&query=Olgun%2C+A">Ataberk Olgun</a>, <a href="/search/q-bio?searchtype=author&query=Gollwitzer%2C+A">Arvid Gollwitzer</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Alserr%2C+N+A">Nour Almadhoun Alserr</a>, <a href="/search/q-bio?searchtype=author&query=Ausavarungnirun%2C+R">Rachata Ausavarungnirun</a>, <a href="/search/q-bio?searchtype=author&query=Vijaykumar%2C+N">Nandita Vijaykumar</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.10400v2-abstract-short" style="display: inline;"> Read mapping is a fundamental, yet computationally-expensive step in many genomics applications. It is used to identify potential matches and differences between fragments (called reads) of a sequenced genome and an already known genome (called a reference genome). To address the computational challenges in genome analysis, many prior works propose various approaches such as filters that select th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.10400v2-abstract-full').style.display = 'inline'; document.getElementById('2202.10400v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.10400v2-abstract-full" style="display: none;"> Read mapping is a fundamental, yet computationally-expensive step in many genomics applications. It is used to identify potential matches and differences between fragments (called reads) of a sequenced genome and an already known genome (called a reference genome). To address the computational challenges in genome analysis, many prior works propose various approaches such as filters that select the reads that must undergo expensive computation, efficient heuristics, and hardware acceleration. While effective at reducing the computation overhead, all such approaches still require the costly movement of a large amount of data from storage to the rest of the system, which can significantly lower the end-to-end performance of read mapping in conventional and emerging genomics systems. We propose GenStore, the first in-storage processing system designed for genome sequence analysis that greatly reduces both data movement and computational overheads of genome sequence analysis by exploiting low-cost and accurate in-storage filters. GenStore leverages hardware/software co-design to address the challenges of in-storage processing, supporting reads with 1) different read lengths and error rates, and 2) different degrees of genetic variation. Through rigorous analysis of read mapping processes, we meticulously design low-cost hardware accelerators and data/computation flows inside a NAND flash-based SSD. Our evaluation using a wide range of real genomic datasets shows that GenStore, when implemented in three modern SSDs, significantly improves the read mapping performance of state-of-the-art software (hardware) baselines by 2.07-6.05$\times$ (1.52-3.32$\times$) for read sets with high similarity to the reference genome and 1.45-33.63$\times$ (2.70-19.2$\times$) for read sets with low similarity to the reference genome. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.10400v2-abstract-full').style.display = 'none'; document.getElementById('2202.10400v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at ASPLOS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.03632">arXiv:2202.03632</a> <span> [<a href="https://arxiv.org/pdf/2202.03632">pdf</a>, <a href="https://arxiv.org/format/2202.03632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.34133/research.0153">10.34133/research.0153 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> ECRECer: Enzyme Commission Number Recommendation and Benchmarking based on Multiagent Dual-core Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Shi%2C+Z">Zhenkun Shi</a>, <a href="/search/q-bio?searchtype=author&query=Yuan%2C+Q">Qianqian Yuan</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+R">Ruoyu Wang</a>, <a href="/search/q-bio?searchtype=author&query=Li%2C+H">Hoaran Li</a>, <a href="/search/q-bio?searchtype=author&query=Liao%2C+X">Xiaoping Liao</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hongwu Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.03632v1-abstract-short" style="display: inline;"> Enzyme Commission (EC) numbers, which associate a protein sequence with the biochemical reactions it catalyzes, are essential for the accurate understanding of enzyme functions and cellular metabolism. Many ab-initio computational approaches were proposed to predict EC numbers for given input sequences directly. However, the prediction performance (accuracy, recall, precision), usability, and effi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.03632v1-abstract-full').style.display = 'inline'; document.getElementById('2202.03632v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.03632v1-abstract-full" style="display: none;"> Enzyme Commission (EC) numbers, which associate a protein sequence with the biochemical reactions it catalyzes, are essential for the accurate understanding of enzyme functions and cellular metabolism. Many ab-initio computational approaches were proposed to predict EC numbers for given input sequences directly. However, the prediction performance (accuracy, recall, precision), usability, and efficiency of existing methods still have much room to be improved. Here, we report ECRECer, a cloud platform for accurately predicting EC numbers based on novel deep learning techniques. To build ECRECer, we evaluate different protein representation methods and adopt a protein language model for protein sequence embedding. After embedding, we propose a multi-agent hierarchy deep learning-based framework to learn the proposed tasks in a multi-task manner. Specifically, we used an extreme multi-label classifier to perform the EC prediction and employed a greedy strategy to integrate and fine-tune the final model. Comparative analyses against four representative methods demonstrate that ECRECer delivers the highest performance, which improves accuracy and F1 score by 70% and 20% over the state-of-the-the-art, respectively. With ECRECer, we can annotate numerous enzymes in the Swiss-Prot database with incomplete EC numbers to their full fourth level. Take UniPort protein "A0A0U5GJ41" as an example (1.14.-.-), ECRECer annotated it with "1.14.11.38", which supported by further protein structure analysis based on AlphaFold2. Finally, we established a webserver (https://ecrecer.biodesign.ac.cn) and provided an offline bundle to improve usability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.03632v1-abstract-full').style.display = 'none'; document.getElementById('2202.03632v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 14 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> research.0153 <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> I.2.6 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Research. 2023:6;0153 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.07531">arXiv:2110.07531</a> <span> [<a href="https://arxiv.org/pdf/2110.07531">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biological Physics">physics.bio-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Deep learning models for predicting RNA degradation via dual crowdsourcing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wayment-Steele%2C+H+K">Hannah K. Wayment-Steele</a>, <a href="/search/q-bio?searchtype=author&query=Kladwang%2C+W">Wipapat Kladwang</a>, <a href="/search/q-bio?searchtype=author&query=Watkins%2C+A+M">Andrew M. Watkins</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+D+S">Do Soon Kim</a>, <a href="/search/q-bio?searchtype=author&query=Tunguz%2C+B">Bojan Tunguz</a>, <a href="/search/q-bio?searchtype=author&query=Reade%2C+W">Walter Reade</a>, <a href="/search/q-bio?searchtype=author&query=Demkin%2C+M">Maggie Demkin</a>, <a href="/search/q-bio?searchtype=author&query=Romano%2C+J">Jonathan Romano</a>, <a href="/search/q-bio?searchtype=author&query=Wellington-Oguri%2C+R">Roger Wellington-Oguri</a>, <a href="/search/q-bio?searchtype=author&query=Nicol%2C+J+J">John J. Nicol</a>, <a href="/search/q-bio?searchtype=author&query=Gao%2C+J">Jiayang Gao</a>, <a href="/search/q-bio?searchtype=author&query=Onodera%2C+K">Kazuki Onodera</a>, <a href="/search/q-bio?searchtype=author&query=Fujikawa%2C+K">Kazuki Fujikawa</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Hanfei Mao</a>, <a href="/search/q-bio?searchtype=author&query=Vandewiele%2C+G">Gilles Vandewiele</a>, <a href="/search/q-bio?searchtype=author&query=Tinti%2C+M">Michele Tinti</a>, <a href="/search/q-bio?searchtype=author&query=Steenwinckel%2C+B">Bram Steenwinckel</a>, <a href="/search/q-bio?searchtype=author&query=Ito%2C+T">Takuya Ito</a>, <a href="/search/q-bio?searchtype=author&query=Noumi%2C+T">Taiga Noumi</a>, <a href="/search/q-bio?searchtype=author&query=He%2C+S">Shujun He</a>, <a href="/search/q-bio?searchtype=author&query=Ishi%2C+K">Keiichiro Ishi</a>, <a href="/search/q-bio?searchtype=author&query=Lee%2C+Y">Youhan Lee</a>, <a href="/search/q-bio?searchtype=author&query=%C3%96zt%C3%BCrk%2C+F">Fatih 脰zt眉rk</a>, <a href="/search/q-bio?searchtype=author&query=Chiu%2C+A">Anthony Chiu</a>, <a href="/search/q-bio?searchtype=author&query=%C3%96zt%C3%BCrk%2C+E">Emin 脰zt眉rk</a> , et al. (4 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.07531v2-abstract-short" style="display: inline;"> Messenger RNA-based medicines hold immense potential, as evidenced by their rapid deployment as COVID-19 vaccines. However, worldwide distribution of mRNA molecules has been limited by their thermostability, which is fundamentally limited by the intrinsic instability of RNA molecules to a chemical degradation reaction called in-line hydrolysis. Predicting the degradation of an RNA molecule is a ke… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.07531v2-abstract-full').style.display = 'inline'; document.getElementById('2110.07531v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.07531v2-abstract-full" style="display: none;"> Messenger RNA-based medicines hold immense potential, as evidenced by their rapid deployment as COVID-19 vaccines. However, worldwide distribution of mRNA molecules has been limited by their thermostability, which is fundamentally limited by the intrinsic instability of RNA molecules to a chemical degradation reaction called in-line hydrolysis. Predicting the degradation of an RNA molecule is a key task in designing more stable RNA-based therapeutics. Here, we describe a crowdsourced machine learning competition ("Stanford OpenVaccine") on Kaggle, involving single-nucleotide resolution measurements on 6043 102-130-nucleotide diverse RNA constructs that were themselves solicited through crowdsourcing on the RNA design platform Eterna. The entire experiment was completed in less than 6 months, and 41% of nucleotide-level predictions from the winning model were within experimental error of the ground truth measurement. Furthermore, these models generalized to blindly predicting orthogonal degradation data on much longer mRNA molecules (504-1588 nucleotides) with improved accuracy compared to previously published models. Top teams integrated natural language processing architectures and data augmentation techniques with predictions from previous dynamic programming models for RNA secondary structure. These results indicate that such models are capable of representing in-line hydrolysis with excellent accuracy, supporting their use for designing stabilized messenger RNAs. The integration of two crowdsourcing platforms, one for data set creation and another for machine learning, may be fruitful for other urgent problems that demand scientific discovery on rapid timescales. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.07531v2-abstract-full').style.display = 'none'; document.getElementById('2110.07531v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.02843">arXiv:2103.02843</a> <span> [<a href="https://arxiv.org/pdf/2103.02843">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biological Physics">physics.bio-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1098/rsfs.2021.0018">10.1098/rsfs.2021.0018 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Pandemic Drugs at Pandemic Speed: Infrastructure for Accelerating COVID-19 Drug Discovery with Hybrid Machine Learning- and Physics-based Simulations on High Performance Computers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Bhati%2C+A+P">Agastya P. Bhati</a>, <a href="/search/q-bio?searchtype=author&query=Wan%2C+S">Shunzhou Wan</a>, <a href="/search/q-bio?searchtype=author&query=Alf%C3%A8%2C+D">Dario Alf猫</a>, <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A+R">Austin R. Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Bode%2C+M">Mathis Bode</a>, <a href="/search/q-bio?searchtype=author&query=Tan%2C+L">Li Tan</a>, <a href="/search/q-bio?searchtype=author&query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/q-bio?searchtype=author&query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/q-bio?searchtype=author&query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/q-bio?searchtype=author&query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/q-bio?searchtype=author&query=Highfield%2C+R+R">Roger R. Highfield</a>, <a href="/search/q-bio?searchtype=author&query=Rocchia%2C+W">Walter Rocchia</a>, <a href="/search/q-bio?searchtype=author&query=Scafuri%2C+N">Nicola Scafuri</a>, <a href="/search/q-bio?searchtype=author&query=Succi%2C+S">Sauro Succi</a>, <a href="/search/q-bio?searchtype=author&query=Kranzlm%C3%BCller%2C+D">Dieter Kranzlm眉ller</a>, <a href="/search/q-bio?searchtype=author&query=Mathias%2C+G">Gerald Mathias</a>, <a href="/search/q-bio?searchtype=author&query=Wifling%2C+D">David Wifling</a>, <a href="/search/q-bio?searchtype=author&query=Donon%2C+Y">Yann Donon</a>, <a href="/search/q-bio?searchtype=author&query=Di+Meglio%2C+A">Alberto Di Meglio</a>, <a href="/search/q-bio?searchtype=author&query=Vallecorsa%2C+S">Sofia Vallecorsa</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Heng Ma</a>, <a href="/search/q-bio?searchtype=author&query=Trifan%2C+A">Anda Trifan</a>, <a href="/search/q-bio?searchtype=author&query=Ramanathan%2C+A">Arvind Ramanathan</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Tom Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a> , et al. (4 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.02843v2-abstract-short" style="display: inline;"> The race to meet the challenges of the global pandemic has served as a reminder that the existing drug discovery process is expensive, inefficient and slow. There is a major bottleneck screening the vast number of potential small molecules to shortlist lead compounds for antiviral drug development. New opportunities to accelerate drug discovery lie at the interface between machine learning methods… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.02843v2-abstract-full').style.display = 'inline'; document.getElementById('2103.02843v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.02843v2-abstract-full" style="display: none;"> The race to meet the challenges of the global pandemic has served as a reminder that the existing drug discovery process is expensive, inefficient and slow. There is a major bottleneck screening the vast number of potential small molecules to shortlist lead compounds for antiviral drug development. New opportunities to accelerate drug discovery lie at the interface between machine learning methods, in this case developed for linear accelerators, and physics-based methods. The two in silico methods, each have their own advantages and limitations which, interestingly, complement each other. Here, we present an innovative infrastructural development that combines both approaches to accelerate drug discovery. The scale of the potential resulting workflow is such that it is dependent on supercomputing to achieve extremely high throughput. We have demonstrated the viability of this workflow for the study of inhibitors for four COVID-19 target proteins and our ability to perform the required large-scale calculations to identify lead antiviral compounds through repurposing on a variety of supercomputers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.02843v2-abstract-full').style.display = 'none'; document.getElementById('2103.02843v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Interface Focus. 2021. 11 (6): 20210018 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.00885">arXiv:2012.00885</a> <span> [<a href="https://arxiv.org/pdf/2012.00885">pdf</a>, <a href="https://arxiv.org/format/2012.00885">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Artificial intelligence techniques for integrative structural biology of intrinsically disordered proteins </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ramanathan%2C+A">Arvind Ramanathan</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Heng Ma</a>, <a href="/search/q-bio?searchtype=author&query=Parvatikar%2C+A">Akash Parvatikar</a>, <a href="/search/q-bio?searchtype=author&query=Chennubhotla%2C+C+S">Chakra S. Chennubhotla</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.00885v1-abstract-short" style="display: inline;"> We outline recent developments in artificial intelligence (AI) and machine learning (ML) techniques for integrative structural biology of intrinsically disordered proteins (IDP) ensembles. IDPs challenge the traditional protein structure-function paradigm by adapting their conformations in response to specific binding partners leading them to mediate diverse, and often complex cellular functions s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.00885v1-abstract-full').style.display = 'inline'; document.getElementById('2012.00885v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.00885v1-abstract-full" style="display: none;"> We outline recent developments in artificial intelligence (AI) and machine learning (ML) techniques for integrative structural biology of intrinsically disordered proteins (IDP) ensembles. IDPs challenge the traditional protein structure-function paradigm by adapting their conformations in response to specific binding partners leading them to mediate diverse, and often complex cellular functions such as biological signaling, self organization and compartmentalization. Obtaining mechanistic insights into their function can therefore be challenging for traditional structural determination techniques. Often, scientists have to rely on piecemeal evidence drawn from diverse experimental techniques to characterize their functional mechanisms. Multiscale simulations can help bridge critical knowledge gaps about IDP structure function relationships - however, these techniques also face challenges in resolving emergent phenomena within IDP conformational ensembles. We posit that scalable statistical inference techniques can effectively integrate information gleaned from multiple experimental techniques as well as from simulations, thus providing access to atomistic details of these emergent phenomena. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.00885v1-abstract-full').style.display = 'none'; document.getElementById('2012.00885v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.06574">arXiv:2010.06574</a> <span> [<a href="https://arxiv.org/pdf/2010.06574">pdf</a>, <a href="https://arxiv.org/format/2010.06574">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> IMPECCABLE: Integrated Modeling PipelinE for COVID Cure by Assessing Better LEads </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Saadi%2C+A+A">Aymen Al Saadi</a>, <a href="/search/q-bio?searchtype=author&query=Alfe%2C+D">Dario Alfe</a>, <a href="/search/q-bio?searchtype=author&query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/q-bio?searchtype=author&query=Bhati%2C+A">Agastya Bhati</a>, <a href="/search/q-bio?searchtype=author&query=Blaiszik%2C+B">Ben Blaiszik</a>, <a href="/search/q-bio?searchtype=author&query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/q-bio?searchtype=author&query=Chard%2C+K">Kyle Chard</a>, <a href="/search/q-bio?searchtype=author&query=Chard%2C+R">Ryan Chard</a>, <a href="/search/q-bio?searchtype=author&query=Coveney%2C+P">Peter Coveney</a>, <a href="/search/q-bio?searchtype=author&query=Trifan%2C+A">Anda Trifan</a>, <a href="/search/q-bio?searchtype=author&query=Brace%2C+A">Alex Brace</a>, <a href="/search/q-bio?searchtype=author&query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/q-bio?searchtype=author&query=Foster%2C+I">Ian Foster</a>, <a href="/search/q-bio?searchtype=author&query=Gibbs%2C+T">Tom Gibbs</a>, <a href="/search/q-bio?searchtype=author&query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/q-bio?searchtype=author&query=Keipert%2C+K">Kristopher Keipert</a>, <a href="/search/q-bio?searchtype=author&query=Kurth%2C+T">Thorsten Kurth</a>, <a href="/search/q-bio?searchtype=author&query=Kranzlm%C3%BCller%2C+D">Dieter Kranzlm眉ller</a>, <a href="/search/q-bio?searchtype=author&query=Lee%2C+H">Hyungro Lee</a>, <a href="/search/q-bio?searchtype=author&query=Li%2C+Z">Zhuozhao Li</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Heng Ma</a>, <a href="/search/q-bio?searchtype=author&query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/q-bio?searchtype=author&query=Mathias%2C+G">Gerald Mathias</a>, <a href="/search/q-bio?searchtype=author&query=Partin%2C+A">Alexander Partin</a>, <a href="/search/q-bio?searchtype=author&query=Yin%2C+J">Junqi Yin</a> , et al. (11 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.06574v1-abstract-short" style="display: inline;"> The drug discovery process currently employed in the pharmaceutical industry typically requires about 10 years and $2-3 billion to deliver one new drug. This is both too expensive and too slow, especially in emergencies like the COVID-19 pandemic. In silicomethodologies need to be improved to better select lead compounds that can proceed to later stages of the drug discovery protocol accelerating… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06574v1-abstract-full').style.display = 'inline'; document.getElementById('2010.06574v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.06574v1-abstract-full" style="display: none;"> The drug discovery process currently employed in the pharmaceutical industry typically requires about 10 years and $2-3 billion to deliver one new drug. This is both too expensive and too slow, especially in emergencies like the COVID-19 pandemic. In silicomethodologies need to be improved to better select lead compounds that can proceed to later stages of the drug discovery protocol accelerating the entire process. No single methodological approach can achieve the necessary accuracy with required efficiency. Here we describe multiple algorithmic innovations to overcome this fundamental limitation, development and deployment of computational infrastructure at scale integrates multiple artificial intelligence and simulation-based approaches. Three measures of performance are:(i) throughput, the number of ligands per unit time; (ii) scientific performance, the number of effective ligands sampled per unit time and (iii) peak performance, in flop/s. The capabilities outlined here have been used in production for several months as the workhorse of the computational infrastructure to support the capabilities of the US-DOE National Virtual Biotechnology Laboratory in combination with resources from the EU Centre of Excellence in Computational Biomedicine. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06574v1-abstract-full').style.display = 'none'; document.getElementById('2010.06574v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.13607">arXiv:2005.13607</a> <span> [<a href="https://arxiv.org/pdf/2005.13607">pdf</a>, <a href="https://arxiv.org/format/2005.13607">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Multi-View Graph Neural Networks for Molecular Property Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hehuan Ma</a>, <a href="/search/q-bio?searchtype=author&query=Bian%2C+Y">Yatao Bian</a>, <a href="/search/q-bio?searchtype=author&query=Rong%2C+Y">Yu Rong</a>, <a href="/search/q-bio?searchtype=author&query=Huang%2C+W">Wenbing Huang</a>, <a href="/search/q-bio?searchtype=author&query=Xu%2C+T">Tingyang Xu</a>, <a href="/search/q-bio?searchtype=author&query=Xie%2C+W">Weiyang Xie</a>, <a href="/search/q-bio?searchtype=author&query=Ye%2C+G">Geyan Ye</a>, <a href="/search/q-bio?searchtype=author&query=Huang%2C+J">Junzhou Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.13607v3-abstract-short" style="display: inline;"> The crux of molecular property prediction is to generate meaningful representations of the molecules. One promising route is to exploit the molecular graph structure through Graph Neural Networks (GNNs). It is well known that both atoms and bonds significantly affect the chemical properties of a molecule, so an expressive model shall be able to exploit both node (atom) and edge (bond) information… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.13607v3-abstract-full').style.display = 'inline'; document.getElementById('2005.13607v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.13607v3-abstract-full" style="display: none;"> The crux of molecular property prediction is to generate meaningful representations of the molecules. One promising route is to exploit the molecular graph structure through Graph Neural Networks (GNNs). It is well known that both atoms and bonds significantly affect the chemical properties of a molecule, so an expressive model shall be able to exploit both node (atom) and edge (bond) information simultaneously. Guided by this observation, we present Multi-View Graph Neural Network (MV-GNN), a multi-view message passing architecture to enable more accurate predictions of molecular properties. In MV-GNN, we introduce a shared self-attentive readout component and disagreement loss to stabilize the training process. This readout component also renders the whole architecture interpretable. We further boost the expressive power of MV-GNN by proposing a cross-dependent message passing scheme that enhances information communication of the two views, which results in the MV-GNN^cross variant. Lastly, we theoretically justify the expressiveness of the two proposed models in terms of distinguishing non-isomorphism graphs. Extensive experiments demonstrate that MV-GNN models achieve remarkably superior performance over the state-of-the-art models on a variety of challenging benchmarks. Meanwhile, visualization results of the node importance are consistent with prior knowledge, which confirms the interpretability power of MV-GNN models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.13607v3-abstract-full').style.display = 'none'; document.getElementById('2005.13607v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2001.02744">arXiv:2001.02744</a> <span> [<a href="https://arxiv.org/pdf/2001.02744">pdf</a>, <a href="https://arxiv.org/format/2001.02744">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> </div> </div> <p class="title is-5 mathjax"> Contribution of high risk groups' unmet needs may be underestimated in epidemic models without risk turnover: a mechanistic modelling analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Knight%2C+J">Jesse Knight</a>, <a href="/search/q-bio?searchtype=author&query=Baral%2C+S+D">Stefan D. Baral</a>, <a href="/search/q-bio?searchtype=author&query=Schwartz%2C+S">Sheree Schwartz</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+L">Linwei Wang</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Huiting Ma</a>, <a href="/search/q-bio?searchtype=author&query=Young%2C+K">Katherine Young</a>, <a href="/search/q-bio?searchtype=author&query=Hausler%2C+H">Harry Hausler</a>, <a href="/search/q-bio?searchtype=author&query=Mishra%2C+S">Sharmistha Mishra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2001.02744v1-abstract-short" style="display: inline;"> BACKGROUND. Epidemic models of STIs are often used to characterize the contribution of risk groups to overall transmission by projecting the transmission population attributable fraction (tPAF) of unmet prevention and treatment needs within risk groups. However, evidence suggests that STI risk is dynamic over an individual's sexual life course, which manifests as turnover between risk groups. We s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.02744v1-abstract-full').style.display = 'inline'; document.getElementById('2001.02744v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2001.02744v1-abstract-full" style="display: none;"> BACKGROUND. Epidemic models of STIs are often used to characterize the contribution of risk groups to overall transmission by projecting the transmission population attributable fraction (tPAF) of unmet prevention and treatment needs within risk groups. However, evidence suggests that STI risk is dynamic over an individual's sexual life course, which manifests as turnover between risk groups. We sought to examine the mechanisms by which turnover influences modelled projections of the tPAF of high risk groups. METHODS. We developed a unifying, data-guided framework to simulate risk group turnover in deterministic, compartmental transmission models. We applied the framework to an illustrative model of an STI and examined the mechanisms by which risk group turnover influenced equilibrium prevalence across risk groups. We then fit a model with and without turnover to the same risk-stratified STI prevalence targets and compared the inferred level of risk heterogeneity and tPAF of the highest risk group projected by the two models. RESULTS. The influence of turnover on group-specific prevalence was mediated by three main phenomena: movement of infectious individuals between risk groups; changes to herd immunity; and changes in discordant partnerships. Faster turnover led to a smaller ratio of STI prevalence between the highest and lowest risk groups. Compared to the fitted model without turnover, the fitted model with turnover inferred greater risk heterogeneity and consistently projected a larger tPAF of the highest risk group over time. IMPLICATIONS. If turnover is not captured in epidemic models, the projected contribution of high risk groups, and thus, the potential impact of prioritizing interventions to address their needs, could be underestimated. To aid the next generation of tPAF models, data collection efforts to parameterize risk group turnover should be prioritized. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.02744v1-abstract-full').style.display = 'none'; document.getElementById('2001.02744v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, 20 figures, code: https://github.com/mishra-lab/turnover</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.04949">arXiv:1912.04949</a> <span> [<a href="https://arxiv.org/pdf/1912.04949">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Other Quantitative Biology">q-bio.OT</span> </div> </div> <p class="title is-5 mathjax"> Venue-based HIV testing at sex work hotspots to reach adolescent girls and young women living with HIV: a cross-sectional study in Mombasa, Kenya </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Huiting Ma</a>, <a href="/search/q-bio?searchtype=author&query=Wang%2C+L">Linwei Wang</a>, <a href="/search/q-bio?searchtype=author&query=Gichangi%2C+P">Peter Gichangi</a>, <a href="/search/q-bio?searchtype=author&query=Mochache%2C+V">Vernon Mochache</a>, <a href="/search/q-bio?searchtype=author&query=Manguro%2C+G">Griffins Manguro</a>, <a href="/search/q-bio?searchtype=author&query=Musyoki%2C+H+K">Helgar K Musyoki</a>, <a href="/search/q-bio?searchtype=author&query=Bhattacharjee%2C+P">Parinita Bhattacharjee</a>, <a href="/search/q-bio?searchtype=author&query=Cholette%2C+F">Fran莽ois Cholette</a>, <a href="/search/q-bio?searchtype=author&query=Sandstrom%2C+P">Paul Sandstrom</a>, <a href="/search/q-bio?searchtype=author&query=Becker%2C+M+L">Marissa L Becker</a>, <a href="/search/q-bio?searchtype=author&query=Mishra%2C+S">Sharmistha Mishra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.04949v1-abstract-short" style="display: inline;"> Background: We estimated the potential number of newly diagnosed HIV infections among adolescent girls and young women (AGYW) using a venue-based approach to HIV testing at sex work hotspots. Methods: We used hotspot enumeration and cross-sectional bio-behavioural survey data from the 2015 Transitions Study of AGYW aged 14-24 years who frequented hotspots in Mombasa, Kenya. We compared the HIV c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.04949v1-abstract-full').style.display = 'inline'; document.getElementById('1912.04949v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.04949v1-abstract-full" style="display: none;"> Background: We estimated the potential number of newly diagnosed HIV infections among adolescent girls and young women (AGYW) using a venue-based approach to HIV testing at sex work hotspots. Methods: We used hotspot enumeration and cross-sectional bio-behavioural survey data from the 2015 Transitions Study of AGYW aged 14-24 years who frequented hotspots in Mombasa, Kenya. We compared the HIV cascade among AGYW who sell sex (YSW, N=408) versus those who do not (NSW, N=891); and triangulated the potential (100% test acceptance and accuracy) and feasible (accounting for test acceptance and sensitivity) number of AGYW that could be newly diagnosed via hotspot-based HIV rapid testing in Mombasa. We identified the profile of AGYW recently tested for HIV (in the past year) using multivariable logistic regression. Results: N=37/365 (10.1%) YSW and N=30/828 (3.6%) NSW were living with HIV, of whom 27.0% (N=10/37) and 30.0% (N=9/30) were diagnosed and aware (p=0.79). Rapid test acceptance was 89.3% and sensitivity was 80.4%. Hotspot enumeration estimated 15,635 (range: 12,172-19,097) AGYW in hotspots in Mombasa. The potential and feasible number of new diagnosis were 627 (310-1,081), and 450 (223-776), respectively. Thus, hotspot-based testing could feasibly reduce the undiagnosed fraction from 71.6% to 20.2%. The profile of AGYW who recently tested was similar among YSW and NSW. YSW were two-fold more likely to report a recent HIV test after adjusting for other determinants [odds ratio (95% CI): 2.1 (1.6-3.1)]. Conclusion: Reaching AGYW via hotspot-based HIV testing could fill gaps left by traditional, clinic-based HIV prevention and testing services. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.04949v1-abstract-full').style.display = 'none'; document.getElementById('1912.04949v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 6 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1908.00496">arXiv:1908.00496</a> <span> [<a href="https://arxiv.org/pdf/1908.00496">pdf</a>, <a href="https://arxiv.org/format/1908.00496">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Deep Generative Model Driven Protein Folding Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Heng Ma</a>, <a href="/search/q-bio?searchtype=author&query=Bhowmik%2C+D">Debsindhu Bhowmik</a>, <a href="/search/q-bio?searchtype=author&query=Lee%2C+H">Hyungro Lee</a>, <a href="/search/q-bio?searchtype=author&query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/q-bio?searchtype=author&query=Young%2C+M+T">Michael T. Young</a>, <a href="/search/q-bio?searchtype=author&query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/q-bio?searchtype=author&query=Ramanathan%2C+A">Arvind Ramanathan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1908.00496v1-abstract-short" style="display: inline;"> Significant progress in computer hardware and software have enabled molecular dynamics (MD) simulations to model complex biological phenomena such as protein folding. However, enabling MD simulations to access biologically relevant timescales (e.g., beyond milliseconds) still remains challenging. These limitations include (1) quantifying which set of states have already been (sufficiently) sampled… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.00496v1-abstract-full').style.display = 'inline'; document.getElementById('1908.00496v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1908.00496v1-abstract-full" style="display: none;"> Significant progress in computer hardware and software have enabled molecular dynamics (MD) simulations to model complex biological phenomena such as protein folding. However, enabling MD simulations to access biologically relevant timescales (e.g., beyond milliseconds) still remains challenging. These limitations include (1) quantifying which set of states have already been (sufficiently) sampled in an ensemble of MD runs, and (2) identifying novel states from which simulations can be initiated to sample rare events (e.g., sampling folding events). With the recent success of deep learning and artificial intelligence techniques in analyzing large datasets, we posit that these techniques can also be used to adaptively guide MD simulations to model such complex biological phenomena. Leveraging our recently developed unsupervised deep learning technique to cluster protein folding trajectories into partially folded intermediates, we build an iterative workflow that enables our generative model to be coupled with all-atom MD simulations to fold small protein systems on emerging high performance computing platforms. We demonstrate our approach in folding Fs-peptide and the $尾尾伪$ (BBA) fold, FSD-EY. Our adaptive workflow enables us to achieve an overall root-mean squared deviation (RMSD) to the native state of 1.6$~脜$ and 4.4~$脜$ respectively for Fs-peptide and FSD-EY. We also highlight some emerging challenges in the context of designing scalable workflows when data intensive deep learning techniques are coupled to compute intensive MD simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1908.00496v1-abstract-full').style.display = 'none'; document.getElementById('1908.00496v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1606.09348">arXiv:1606.09348</a> <span> [<a href="https://arxiv.org/pdf/1606.09348">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> Phylogenomic Analyses of Large-scale Nuclear Genes Provide New Insights into the Evolutionary Relationships within the Rosids </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Zhao%2C+L">Lei Zhao</a>, <a href="/search/q-bio?searchtype=author&query=Li%2C+X">Xia Li</a>, <a href="/search/q-bio?searchtype=author&query=Zhang%2C+N">Ning Zhang</a>, <a href="/search/q-bio?searchtype=author&query=Zhang%2C+S">Shu-Dong Zhang</a>, <a href="/search/q-bio?searchtype=author&query=Yi%2C+T">Ting-Shuang Yi</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hong Ma</a>, <a href="/search/q-bio?searchtype=author&query=Guo%2C+Z">Zhen-Hua Guo</a>, <a href="/search/q-bio?searchtype=author&query=Li%2C+D">De-Zhu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1606.09348v1-abstract-short" style="display: inline;"> The Rosids is one of the largest groups of flowering plants, with 140 families and ~70,000 species. Previous phylogenetic studies of the rosids have primarily utilized organelle genes that likely differ in evolutionary histories from nuclear genes. To better understand the evolutionary history of rosids, it is necessary to investigate their phylogenetic relationships using nuclear genes. Here, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1606.09348v1-abstract-full').style.display = 'inline'; document.getElementById('1606.09348v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1606.09348v1-abstract-full" style="display: none;"> The Rosids is one of the largest groups of flowering plants, with 140 families and ~70,000 species. Previous phylogenetic studies of the rosids have primarily utilized organelle genes that likely differ in evolutionary histories from nuclear genes. To better understand the evolutionary history of rosids, it is necessary to investigate their phylogenetic relationships using nuclear genes. Here, we employed large-scale phylogenomic datasets composed of nuclear genes, including 891 clusters of putative orthologous genes. Combined with comprehensive taxon sampling covering 63 species representing 14 out of the 17 orders, we reconstructed the rosids phylogeny with coalescence and concatenation methods, yielding similar tree topologies from all datasets. However, these topologies did not agree on the placement of Zygophyllales. Through comprehensive analyses, we found that missing data and gene tree heterogeneity were potential factors that may mislead concatenation methods, in particular, large amounts of missing data under high gene tree heterogeneity. Our results provided new insights into the deep phylogenetic relationships of the rosids, and demonstrated that coalescence methods may effectively resolve the phylogenetic relationships of the rosids with missing data under high gene tree heterogeneity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1606.09348v1-abstract-full').style.display = 'none'; document.getElementById('1606.09348v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2016. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1409.2114">arXiv:1409.2114</a> <span> [<a href="https://arxiv.org/pdf/1409.2114">pdf</a>, <a href="https://arxiv.org/ps/1409.2114">ps</a>, <a href="https://arxiv.org/format/1409.2114">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Disordered Systems and Neural Networks">cond-mat.dis-nn</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1103/PhysRevE.92.022801">10.1103/PhysRevE.92.022801 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Fluctuation-response Relation Unifies Dynamical Behaviors in Neural Fields </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Fung%2C+C+C+A">C. C. Alan Fung</a>, <a href="/search/q-bio?searchtype=author&query=Wong%2C+K+Y+M">K. Y. Michael Wong</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Hongzi Mao</a>, <a href="/search/q-bio?searchtype=author&query=Wu%2C+S">Si Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1409.2114v2-abstract-short" style="display: inline;"> Anticipation is a strategy used by neural fields to compensate for transmission and processing delays during the tracking of dynamical information, and can be achieved by slow, localized, inhibitory feedback mechanisms such as short-term synaptic depression, spike-frequency adaptation, or inhibitory feedback from other layers. Based on the translational symmetry of the mobile network states, we de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1409.2114v2-abstract-full').style.display = 'inline'; document.getElementById('1409.2114v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1409.2114v2-abstract-full" style="display: none;"> Anticipation is a strategy used by neural fields to compensate for transmission and processing delays during the tracking of dynamical information, and can be achieved by slow, localized, inhibitory feedback mechanisms such as short-term synaptic depression, spike-frequency adaptation, or inhibitory feedback from other layers. Based on the translational symmetry of the mobile network states, we derive generic fluctuation-response relations, providing unified predictions that link their tracking behaviors in the presence of external stimuli to the intrinsic dynamics of the neural fields in their absence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1409.2114v2-abstract-full').style.display = 'none'; document.getElementById('1409.2114v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2015; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 September, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Phys. Rev. E 92, 022801 (2015) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1206.3340">arXiv:1206.3340</a> <span> [<a href="https://arxiv.org/pdf/1206.3340">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Molecular Networks">q-bio.MN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Extraction of Deep Phylogenetic Signal and Improved Resolution of Evolutionary Events within the recA/RAD51 Phylogeny </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Chintapalli%2C+S+V">Sree V. Chintapalli</a>, <a href="/search/q-bio?searchtype=author&query=Bhardwaj%2C+G">Gaurav Bhardwaj</a>, <a href="/search/q-bio?searchtype=author&query=Babu%2C+J">Jagadish Babu</a>, <a href="/search/q-bio?searchtype=author&query=Hadjiyianni%2C+L">Loukia Hadjiyianni</a>, <a href="/search/q-bio?searchtype=author&query=Hong%2C+Y">Yoojin Hong</a>, <a href="/search/q-bio?searchtype=author&query=Zhang%2C+Z">Zhenhai Zhang</a>, <a href="/search/q-bio?searchtype=author&query=Zhou%2C+X">Xiaofan Zhou</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hong Ma</a>, <a href="/search/q-bio?searchtype=author&query=Anishkin%2C+A">Andriy Anishkin</a>, <a href="/search/q-bio?searchtype=author&query=van+Rossum%2C+D+B">Damian B. van Rossum</a>, <a href="/search/q-bio?searchtype=author&query=Patterson%2C+R+L">Randen L. Patterson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1206.3340v1-abstract-short" style="display: inline;"> The recA/RAD51 gene family encodes a diverse set of recombinase proteins that effect homologous recombination, DNA-repair, and genome stability. The recA gene family is expressed in almost all species of Eubacteria, Archaea, and Eukaryotes, and even in some viruses. To date, efforts to resolve the deep evolutionary origins of this ancient protein family have been hindered, in part, by the high seq… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1206.3340v1-abstract-full').style.display = 'inline'; document.getElementById('1206.3340v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1206.3340v1-abstract-full" style="display: none;"> The recA/RAD51 gene family encodes a diverse set of recombinase proteins that effect homologous recombination, DNA-repair, and genome stability. The recA gene family is expressed in almost all species of Eubacteria, Archaea, and Eukaryotes, and even in some viruses. To date, efforts to resolve the deep evolutionary origins of this ancient protein family have been hindered, in part, by the high sequence divergence between families (i.e. ~30% identity between paralogous groups). Through (i) large taxon sampling, (ii) the use of a phylogenetic algorithm designed for measuring highly divergent paralogs, and (iii) novel Evolutionary Spatial Dynamics simulation and analytical tools, we obtained a robust, parsimonious and more refined phylogenetic history of the recA/RAD51 superfamily. Taken together, our model for the evolution of recA/RAD51 family provides a better understanding of ancient origin of recA proteins and multiple events leading to the diversification of recA homologs in eukaryotes, including the discovery of additional RAD51 sub-families. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1206.3340v1-abstract-full').style.display = 'none'; document.getElementById('1206.3340v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 11 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/0910.0928">arXiv:0910.0928</a> <span> [<a href="https://arxiv.org/pdf/0910.0928">pdf</a>, <a href="https://arxiv.org/format/0910.0928">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.4204/EPTCS.6.3">10.4204/EPTCS.6.3 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> BioDiVinE: A Framework for Parallel Analysis of Biological Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Barnat%2C+J">Ji艡铆 Barnat</a>, <a href="/search/q-bio?searchtype=author&query=Brim%2C+L">Lubo拧 Brim</a>, <a href="/search/q-bio?searchtype=author&query=%C4%8Cern%C3%A1%2C+I">Ivana 膶ern谩</a>, <a href="/search/q-bio?searchtype=author&query=Dra%C5%BEan%2C+S">Sven Dra啪an</a>, <a href="/search/q-bio?searchtype=author&query=Fabrikov%C3%A1%2C+J">Jana Fabrikov谩</a>, <a href="/search/q-bio?searchtype=author&query=L%C3%A1n%C3%ADk%2C+J">Jan L谩n铆k</a>, <a href="/search/q-bio?searchtype=author&query=%C5%A0afr%C3%A1nek%2C+D">David 艩afr谩nek</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hongwu Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="0910.0928v1-abstract-short" style="display: inline;"> In this paper a novel tool BioDiVinEfor parallel analysis of biological models is presented. The tool allows analysis of biological models specified in terms of a set of chemical reactions. Chemical reactions are transformed into a system of multi-affine differential equations. BioDiVinE employs techniques for finite discrete abstraction of the continuous state space. At that level, parallel ana… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('0910.0928v1-abstract-full').style.display = 'inline'; document.getElementById('0910.0928v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="0910.0928v1-abstract-full" style="display: none;"> In this paper a novel tool BioDiVinEfor parallel analysis of biological models is presented. The tool allows analysis of biological models specified in terms of a set of chemical reactions. Chemical reactions are transformed into a system of multi-affine differential equations. BioDiVinE employs techniques for finite discrete abstraction of the continuous state space. At that level, parallel analysis algorithms based on model checking are provided. In the paper, the key tool features are described and their application is demonstrated by means of a case study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('0910.0928v1-abstract-full').style.display = 'none'; document.getElementById('0910.0928v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2009; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2009. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> EPTCS 6, 2009, pp. 31-45 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/q-bio/0410023">arXiv:q-bio/0410023</a> <span> [<a href="https://arxiv.org/pdf/q-bio/0410023">pdf</a>, <a href="https://arxiv.org/ps/q-bio/0410023">ps</a>, <a href="https://arxiv.org/format/q-bio/0410023">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Molecular Networks">q-bio.MN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> Network of tRNA Gene Sequences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Wei%2C+F">Fangping Wei</a>, <a href="/search/q-bio?searchtype=author&query=Li%2C+S">Sheng Li</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Hongru Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="q-bio/0410023v3-abstract-short" style="display: inline;"> We showed in this paper that similarity network can be used as an powerful tools to study the relationship of tRNA genes. We constructed a network of 3719 tRNA gene sequences using simplest alignment and studied its topology, degree distribution and clustering coefficient. It is found that the behavior of the network shift from fluctuated distribution to scale-free distribution when the similari… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('q-bio/0410023v3-abstract-full').style.display = 'inline'; document.getElementById('q-bio/0410023v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="q-bio/0410023v3-abstract-full" style="display: none;"> We showed in this paper that similarity network can be used as an powerful tools to study the relationship of tRNA genes. We constructed a network of 3719 tRNA gene sequences using simplest alignment and studied its topology, degree distribution and clustering coefficient. It is found that the behavior of the network shift from fluctuated distribution to scale-free distribution when the similarity degree of the tRNA gene sequences increase. tRNA gene sequences with the same anticodon identity are more self-organized than the tRNA gene sequences with different anticodon identities and form local clusters in the network. An interesting finding in our studied is some vertices of the local cluster have a high connection with other local clusters, the probable reason is given. Moreover, a network constructed by the same number of random tRNA sequences is used to make comparisons. The relationships between properties of the tRNA similarity network and the characters of tRNA evolutionary history are discussed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('q-bio/0410023v3-abstract-full').style.display = 'none'; document.getElementById('q-bio/0410023v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 December, 2004; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 October, 2004; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2004. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Latex, 18 pages, 17 figures</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository