CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–33 of 33 results for author: <span class="mathjax">Alser, M</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/q-bio" aria-role="search"> Searching in archive <strong>q-bio</strong>. <a href="/search/?searchtype=author&query=Alser%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Alser, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Alser%2C+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Alser, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19113">arXiv:2406.19113</a> <span> [<a href="https://arxiv.org/pdf/2406.19113">pdf</a>, <a href="https://arxiv.org/format/2406.19113">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> MegIS: High-Performance, Energy-Efficient, and Low-Cost Metagenomic Analysis with In-Storage Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Mustafa%2C+H">Harun Mustafa</a>, <a href="/search/q-bio?searchtype=author&query=Gollwitzer%2C+A">Arvid Gollwitzer</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Eudine%2C+J">Julien Eudine</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Jo毛l Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Park%2C+J">Jisung Park</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19113v1-abstract-short" style="display: inline;"> Metagenomics has led to significant advances in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases. Metagenomic analysis suffers from significant data movement overhead due to moving large amounts of low-reuse data from the storage system. In-storag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19113v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19113v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19113v1-abstract-full" style="display: none;"> Metagenomics has led to significant advances in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases. Metagenomic analysis suffers from significant data movement overhead due to moving large amounts of low-reuse data from the storage system. In-storage processing can be a fundamental solution for reducing this overhead. However, designing an in-storage processing system for metagenomics is challenging because existing approaches to metagenomic analysis cannot be directly implemented in storage effectively due to the hardware limitations of modern SSDs. We propose MegIS, the first in-storage processing system designed to significantly reduce the data movement overhead of the end-to-end metagenomic analysis pipeline. MegIS is enabled by our lightweight design that effectively leverages and orchestrates processing inside and outside the storage system. We address in-storage processing challenges for metagenomics via specialized and efficient 1) task partitioning, 2) data/computation flow coordination, 3) storage technology-aware algorithmic optimizations, 4) data mapping, and 5) lightweight in-storage accelerators. MegIS's design is flexible, capable of supporting different types of metagenomic input datasets, and can be integrated into various metagenomic analysis pipelines. Our evaluation shows that MegIS outperforms the state-of-the-art performance- and accuracy-optimized software metagenomic tools by 2.7$\times$-37.2$\times$ and 6.9$\times$-100.2$\times$, respectively, while matching the accuracy of the accuracy-optimized tool. MegIS achieves 1.5$\times$-5.1$\times$ speedup compared to the state-of-the-art metagenomic hardware-accelerated (using processing-in-memory) tool, while achieving significantly higher accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19113v1-abstract-full').style.display = 'none'; document.getElementById('2406.19113v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in ISCA 2024. arXiv admin note: substantial text overlap with arXiv:2311.12527</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12527">arXiv:2311.12527</a> <span> [<a href="https://arxiv.org/pdf/2311.12527">pdf</a>, <a href="https://arxiv.org/format/2311.12527">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> MetaStore: High-Performance Metagenomic Analysis via In-Storage Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Mustafa%2C+H">Harun Mustafa</a>, <a href="/search/q-bio?searchtype=author&query=Gollwitzer%2C+A">Arvid Gollwitzer</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Eudine%2C+J">Julien Eudine</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Haiyu Ma</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Jo毛l Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Park%2C+J">Jisung Park</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12527v1-abstract-short" style="display: inline;"> Metagenomics has led to significant advancements in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases containing information on different species' genomes. Metagenomic analysis suffers from significant data movement overhead due to moving large amo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12527v1-abstract-full').style.display = 'inline'; document.getElementById('2311.12527v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12527v1-abstract-full" style="display: none;"> Metagenomics has led to significant advancements in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases containing information on different species' genomes. Metagenomic analysis suffers from significant data movement overhead due to moving large amounts of low-reuse data from the storage system to the rest of the system. In-storage processing can be a fundamental solution for reducing data movement overhead. However, designing an in-storage processing system for metagenomics is challenging because none of the existing approaches can be directly implemented in storage effectively due to the hardware limitations of modern SSDs. We propose MetaStore, the first in-storage processing system designed to significantly reduce the data movement overhead of end-to-end metagenomic analysis. MetaStore is enabled by our lightweight and cooperative design that effectively leverages and orchestrates processing inside and outside the storage system. Through our detailed analysis of the end-to-end metagenomic analysis pipeline and careful hardware/software co-design, we address in-storage processing challenges for metagenomics via specialized and efficient 1) task partitioning, 2) data/computation flow coordination, 3) storage technology-aware algorithmic optimizations, 4) light-weight in-storage accelerators, and 5) data mapping. Our evaluation shows that MetaStore outperforms the state-of-the-art performance- and accuracy-optimized software metagenomic tools by 2.7-37.2$\times$ and 6.9-100.2$\times$, respectively, while matching the accuracy of the accuracy-optimized tool. MetaStore achieves 1.5-5.1$\times$ speedup compared to the state-of-the-art metagenomic hardware-accelerated tool, while achieving significantly higher accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12527v1-abstract-full').style.display = 'none'; document.getElementById('2311.12527v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.02029">arXiv:2311.02029</a> <span> [<a href="https://arxiv.org/pdf/2311.02029">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> MetaTrinity: Enabling Fast Metagenomic Classification via Seed Counting and Edit Distance Approximation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Gollwitzer%2C+A+E">Arvid E. Gollwitzer</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Bergtholdt%2C+J">Joel Bergtholdt</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Rumpf%2C+M">Maximilian-David Rumpf</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Mangul%2C+S">Serghei Mangul</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.02029v3-abstract-short" style="display: inline;"> Metagenomics, the study of genome sequences of diverse organisms cohabiting in a shared environment, has experienced significant advancements across various medical and biological fields. Metagenomic analysis is crucial, for instance, in clinical applications such as infectious disease screening and the diagnosis and early detection of diseases such as cancer. A key task in metagenomics is to dete… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02029v3-abstract-full').style.display = 'inline'; document.getElementById('2311.02029v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.02029v3-abstract-full" style="display: none;"> Metagenomics, the study of genome sequences of diverse organisms cohabiting in a shared environment, has experienced significant advancements across various medical and biological fields. Metagenomic analysis is crucial, for instance, in clinical applications such as infectious disease screening and the diagnosis and early detection of diseases such as cancer. A key task in metagenomics is to determine the species present in a sample and their relative abundances. Currently, the field is dominated by either alignment-based tools, which offer high accuracy but are computationally expensive, or alignment-free tools, which are fast but lack the needed accuracy for many applications. In response to this dichotomy, we introduce MetaTrinity, a tool based on heuristics, to achieve a fundamental improvement in accuracy-runtime tradeoff over existing methods. We benchmark MetaTrinity against two leading metagenomic classifiers, each representing different ends of the performance-accuracy spectrum. On one end, Kraken2, a tool optimized for performance, shows modest accuracy yet a rapid runtime. The other end of the spectrum is governed by Metalign, a tool optimized for accuracy. Our evaluations show that MetaTrinity achieves an accuracy comparable to Metalign while gaining a 4x speedup without any loss in accuracy. This directly equates to a fourfold improvement in runtime-accuracy tradeoff. Compared to Kraken2, MetaTrinity requires a 5x longer runtime yet delivers a 17x improvement in accuracy. This demonstrates a 3.4x enhancement in the accuracy-runtime tradeoff for MetaTrinity. This dual comparison positions MetaTrinity as a broadly applicable solution for metagenomic classification, combining advantages of both ends of the spectrum: speed and accuracy. MetaTrinity is publicly available at https://github.com/CMU-SAFARI/MetaTrinity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02029v3-abstract-full').style.display = 'none'; document.getElementById('2311.02029v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.16908">arXiv:2310.16908</a> <span> [<a href="https://arxiv.org/pdf/2310.16908">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> SequenceLab: A Comprehensive Benchmark of Computational Methods for Comparing Genomic Sequences </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Rumpf%2C+M">Maximilian-David Rumpf</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Gollwitzer%2C+A+E">Arvid E. Gollwitzer</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Almadhoun%2C+N">Nour Almadhoun</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Mangul%2C+S">Serghei Mangul</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.16908v4-abstract-short" style="display: inline;"> Computational complexity is a key limitation of genomic analyses. Thus, over the last 30 years, researchers have proposed numerous fast heuristic methods that provide computational relief. Comparing genomic sequences is one of the most fundamental computational steps in most genomic analyses. Due to its high computational complexity, optimized exact and heuristic algorithms are still being develop… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16908v4-abstract-full').style.display = 'inline'; document.getElementById('2310.16908v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.16908v4-abstract-full" style="display: none;"> Computational complexity is a key limitation of genomic analyses. Thus, over the last 30 years, researchers have proposed numerous fast heuristic methods that provide computational relief. Comparing genomic sequences is one of the most fundamental computational steps in most genomic analyses. Due to its high computational complexity, optimized exact and heuristic algorithms are still being developed. We find that these methods are highly sensitive to the underlying data, its quality, and various hyperparameters. Despite their wide use, no in-depth analysis has been performed, potentially falsely discarding genetic sequences from further analysis and unnecessarily inflating computational costs. We provide the first analysis and benchmark of this heterogeneity. We deliver an actionable overview of the 11 most widely used state-of-the-art methods for comparing genomic sequences. We also inform readers about their advantages and downsides using thorough experimental evaluation and different real datasets from all major manufacturers (i.e., Illumina, ONT, and PacBio). SequenceLab is publicly available at https://github.com/CMU-SAFARI/SequenceLab. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16908v4-abstract-full').style.display = 'none'; document.getElementById('2310.16908v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.05037">arXiv:2310.05037</a> <span> [<a href="https://arxiv.org/pdf/2310.05037">pdf</a>, <a href="https://arxiv.org/format/2310.05037">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> RawAlign: Accurate, Fast, and Scalable Raw Nanopore Signal Mapping via Combining Seeding and Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Jo毛l Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.05037v2-abstract-short" style="display: inline;"> Nanopore sequencers generate raw electrical signals representing the contents of a biological sequence molecule passing through the nanopore. These signals can be analyzed directly, avoiding basecalling entirely. We observe that while existing proposals for raw signal analysis typically do well in all metrics for small genomes (e.g., viral genomes), they all perform poorly for large genomes (e.g.,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05037v2-abstract-full').style.display = 'inline'; document.getElementById('2310.05037v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.05037v2-abstract-full" style="display: none;"> Nanopore sequencers generate raw electrical signals representing the contents of a biological sequence molecule passing through the nanopore. These signals can be analyzed directly, avoiding basecalling entirely. We observe that while existing proposals for raw signal analysis typically do well in all metrics for small genomes (e.g., viral genomes), they all perform poorly for large genomes (e.g., the human genome). Our goal is to analyze raw nanopore signals in an accurate, fast, and scalable manner. To this end, we propose RawAlign, the first work to integrate fine-grained signal alignment into the state-of-the-art raw signal mapper. To enable accurate, fast, and scalable mapping with alignment, RawAlign implements three algorithmic improvements and hardware acceleration via a vectorized implementation of fine-grained alignment. Together, these significantly reduce the overhead of typically computationally expensive fine-grained alignment. Our extensive evaluations on different use cases and various datasets show RawAlign provides 1) the most accurate mapping for large genomes and 2) and on-par performance compared to RawHash (between 0.80x-1.08x), while achieving better performance than UNCALLED and Sigmap by on average (geo. mean) 2.83x and 2.06x, respectively. Availability: https://github.com/CMU-SAFARI/RawAlign. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05037v2-abstract-full').style.display = 'none'; document.getElementById('2310.05037v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.17063">arXiv:2309.17063</a> <span> [<a href="https://arxiv.org/pdf/2309.17063">pdf</a>, <a href="https://arxiv.org/format/2309.17063">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> GateSeeder: Near-memory CPU-FPGA Acceleration of Short and Long Read Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Eudine%2C+J">Julien Eudine</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.17063v1-abstract-short" style="display: inline;"> Motivation: Read mapping is a computationally expensive process and a major bottleneck in genomics analyses. The performance of read mapping is mainly limited by the performance of three key computational steps: Index Querying, Seed Chaining, and Sequence Alignment. The first step is dominated by how fast and frequent it accesses the main memory (i.e., memory-bound), while the latter two steps are… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.17063v1-abstract-full').style.display = 'inline'; document.getElementById('2309.17063v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.17063v1-abstract-full" style="display: none;"> Motivation: Read mapping is a computationally expensive process and a major bottleneck in genomics analyses. The performance of read mapping is mainly limited by the performance of three key computational steps: Index Querying, Seed Chaining, and Sequence Alignment. The first step is dominated by how fast and frequent it accesses the main memory (i.e., memory-bound), while the latter two steps are dominated by how fast the CPU can compute their computationally-costly dynamic programming algorithms (i.e., compute-bound). Accelerating these three steps by exploiting new algorithms and new hardware devices is essential to accelerate most genome analysis pipelines that widely use read mapping. Given the large body of work on accelerating Sequence Alignment, this work focuses on significantly improving the remaining steps. Results: We introduce GateSeeder, the first CPU-FPGA-based near-memory acceleration of both short and long read mapping. GateSeeder exploits near-memory computation capability provided by modern FPGAs that couple a reconfigurable compute fabric with high-bandwidth memory (HBM) to overcome the memory-bound and compute-bound bottlenecks. GateSeeder also introduces a new lightweight algorithm for finding the potential matching segment pairs. Using real ONT, HiFi, and Illumina sequences, we experimentally demonstrate that GateSeeder outperforms Minimap2, without performing sequence alignment, by up to 40.3x, 4.8x, and 2.3x, respectively. When performing read mapping with sequence alignment, GateSeeder outperforms Minimap2 by 1.15-4.33x (using KSW2) and by 1.97-13.63x (using WFA-GPU). Availability: https://github.com/CMU-SAFARI/GateSeeder <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.17063v1-abstract-full').style.display = 'none'; document.getElementById('2309.17063v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.04953">arXiv:2212.04953</a> <span> [<a href="https://arxiv.org/pdf/2212.04953">pdf</a>, <a href="https://arxiv.org/format/2212.04953">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3389/fgene.2024.1429306">10.3389/fgene.2024.1429306 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> TargetCall: Eliminating the Wasted Computation in Basecalling via Pre-Basecalling Filtering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Jo毛l Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.04953v3-abstract-short" style="display: inline;"> Basecalling is an essential step in nanopore sequencing analysis where the raw signals of nanopore sequencers are converted into nucleotide sequences, i.e., reads. State-of-the-art basecallers employ complex deep learning models to achieve high basecalling accuracy. This makes basecalling computationally inefficient and memory-hungry, bottlenecking the entire genome analysis pipeline. However, for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.04953v3-abstract-full').style.display = 'inline'; document.getElementById('2212.04953v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.04953v3-abstract-full" style="display: none;"> Basecalling is an essential step in nanopore sequencing analysis where the raw signals of nanopore sequencers are converted into nucleotide sequences, i.e., reads. State-of-the-art basecallers employ complex deep learning models to achieve high basecalling accuracy. This makes basecalling computationally inefficient and memory-hungry, bottlenecking the entire genome analysis pipeline. However, for many applications, the majority of reads do no match the reference genome of interest (i.e., target reference) and thus are discarded in later steps in the genomics pipeline, wasting the basecalling computation. To overcome this issue, we propose TargetCall, the first pre-basecalling filter to eliminate the wasted computation in basecalling. TargetCall's key idea is to discard reads that will not match the target reference (i.e., off-target reads) prior to basecalling. TargetCall consists of two main components: (1) LightCall, a lightweight neural network basecaller that produces noisy reads; and (2) Similarity Check, which labels each of these noisy reads as on-target or off-target by matching them to the target reference. Our thorough experimental evaluations show that TargetCall 1) improves the end-to-end basecalling runtime performance of the state-of-the-art basecaller by 3.31x while maintaining high (98.88%) recall in keeping on-target reads, 2) maintains high accuracy in downstream analysis, and 3) achieves better runtime performance, throughput, recall, precision, and generality compared to prior works. TargetCall is available at https://github.com/CMU-SAFARI/TargetCall. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.04953v3-abstract-full').style.display = 'none'; document.getElementById('2212.04953v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.08157">arXiv:2211.08157</a> <span> [<a href="https://arxiv.org/pdf/2211.08157">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Genome-on-Diet: Taming Large-Scale Genomic Analyses via Sparsified Genomics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Eudine%2C+J">Julien Eudine</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.08157v3-abstract-short" style="display: inline;"> Searching for similar genomic sequences is an essential and fundamental step in biomedical research and an overwhelming majority of genomic analyses. State-of-the-art computational methods performing such comparisons fail to cope with the exponential growth of genomic sequencing data. We introduce the concept of sparsified genomics where we systematically exclude a large number of bases from genom… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.08157v3-abstract-full').style.display = 'inline'; document.getElementById('2211.08157v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.08157v3-abstract-full" style="display: none;"> Searching for similar genomic sequences is an essential and fundamental step in biomedical research and an overwhelming majority of genomic analyses. State-of-the-art computational methods performing such comparisons fail to cope with the exponential growth of genomic sequencing data. We introduce the concept of sparsified genomics where we systematically exclude a large number of bases from genomic sequences and enable much faster and more memory-efficient processing of the sparsified, shorter genomic sequences, while providing similar or even higher accuracy compared to processing non-sparsified sequences. Sparsified genomics provides significant benefits to many genomic analyses and has broad applicability. We show that sparsifying genomic sequences greatly accelerates the state-of-the-art read mapper (minimap2) by 2.57-5.38x, 1.13-2.78x, and 3.52-6.28x using real Illumina, HiFi, and ONT reads, respectively, while providing up to 2.1x smaller memory footprint, 2x smaller index size, and more truly detected small and structural variations compared to minimap2. Sparsifying genomic sequences makes containment search through very large genomes and large databases 72.7-75.88x faster and 723.3x more storage-efficient than searching through non-sparsified genomic sequences (with CMash and KMC3). Sparsifying genomic sequences enables robust microbiome discovery by providing 54.15-61.88x faster and 720x more storage-efficient taxonomic profiling of metagenomic samples over the state-of-the-art tool (Metalign). We design and open-source a framework called Genome-on-Diet as an example tool for sparsified genomics, which can be freely downloaded from https://github.com/CMU-SAFARI/Genome-on-Diet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.08157v3-abstract-full').style.display = 'none'; document.getElementById('2211.08157v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03079">arXiv:2211.03079</a> <span> [<a href="https://arxiv.org/pdf/2211.03079">pdf</a>, <a href="https://arxiv.org/format/2211.03079">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> RUBICON: A Framework for Designing Efficient Deep Learning-Based Genomic Basecallers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Denolf%2C+K">Kristof Denolf</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Khodamoradi%2C+A">Alireza Khodamoradi</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Corporaal%2C+H">Henk Corporaal</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03079v7-abstract-short" style="display: inline;"> Nanopore sequencing generates noisy electrical signals that need to be converted into a standard string of DNA nucleotide bases using a computational step called basecalling. The accuracy and speed of basecalling have critical implications for all later steps in genome analysis. Many researchers adopt complex deep learning-based models to perform basecalling without considering the compute demands… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03079v7-abstract-full').style.display = 'inline'; document.getElementById('2211.03079v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03079v7-abstract-full" style="display: none;"> Nanopore sequencing generates noisy electrical signals that need to be converted into a standard string of DNA nucleotide bases using a computational step called basecalling. The accuracy and speed of basecalling have critical implications for all later steps in genome analysis. Many researchers adopt complex deep learning-based models to perform basecalling without considering the compute demands of such models, which leads to slow, inefficient, and memory-hungry basecallers. Therefore, there is a need to reduce the computation and memory cost of basecalling while maintaining accuracy. Our goal is to develop a comprehensive framework for creating deep learning-based basecallers that provide high efficiency and performance. We introduce RUBICON, a framework to develop hardware-optimized basecallers. RUBICON consists of two novel machine-learning techniques that are specifically designed for basecalling. First, we introduce the first quantization-aware basecalling neural architecture search (QABAS) framework to specialize the basecalling neural network architecture for a given hardware acceleration platform while jointly exploring and finding the best bit-width precision for each neural network layer. Second, we develop SkipClip, the first technique to remove the skip connections present in modern basecallers to greatly reduce resource and storage requirements without any loss in basecalling accuracy. We demonstrate the benefits of RUBICON by developing RUBICALL, the first hardware-optimized basecaller that performs fast and accurate basecalling. Compared to the fastest state-of-the-art basecaller, RUBICALL provides a 3.96x speedup with 2.97% higher accuracy. We show that RUBICON helps researchers develop hardware-optimized basecallers that are superior to expert-designed models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03079v7-abstract-full').style.display = 'none'; document.getElementById('2211.03079v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.08600">arXiv:2209.08600</a> <span> [<a href="https://arxiv.org/pdf/2209.08600">pdf</a>, <a href="https://arxiv.org/format/2209.08600">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> GenPIP: In-Memory Acceleration of Genome Analysis via Tight Integration of Basecalling and Read Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Baranwal%2C+A">Akanksha Baranwal</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Manglik%2C+A">Aditya Manglik</a>, <a href="/search/q-bio?searchtype=author&query=Alserr%2C+N+A">Nour Almadhoun Alserr</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.08600v2-abstract-short" style="display: inline;"> Nanopore sequencing is a widely-used high-throughput genome sequencing technology that can sequence long fragments of a genome into raw electrical signals at low cost. Nanopore sequencing requires two computationally-costly processing steps for accurate downstream genome analysis. The first step, basecalling, translates the raw electrical signals into nucleotide bases (i.e., A, C, G, T). The secon… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.08600v2-abstract-full').style.display = 'inline'; document.getElementById('2209.08600v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.08600v2-abstract-full" style="display: none;"> Nanopore sequencing is a widely-used high-throughput genome sequencing technology that can sequence long fragments of a genome into raw electrical signals at low cost. Nanopore sequencing requires two computationally-costly processing steps for accurate downstream genome analysis. The first step, basecalling, translates the raw electrical signals into nucleotide bases (i.e., A, C, G, T). The second step, read mapping, finds the correct location of a read in a reference genome. In existing genome analysis pipelines, basecalling and read mapping are executed separately. We observe in this work that such separate execution of the two most time-consuming steps inherently leads to (1) significant data movement and (2) redundant computations on the data, slowing down the genome analysis pipeline. This paper proposes GenPIP, an in-memory genome analysis accelerator that tightly integrates basecalling and read mapping. GenPIP improves the performance of the genome analysis pipeline with two key mechanisms: (1) in-memory fine-grained collaborative execution of the major genome analysis steps in parallel; (2) a new technique for early-rejection of low-quality and unmapped reads to timely stop the execution of genome analysis for such reads, reducing inefficient computation. Our experiments show that, for the execution of the genome analysis pipeline, GenPIP provides 41.6X (8.4X) speedup and 32.8X (20.8X) energy savings with negligible accuracy loss compared to the state-of-the-art software genome analysis tools executed on a state-of-the-art CPU (GPU). Compared to a design that combines state-of-the-art in-memory basecalling and read mapping accelerators, GenPIP provides 1.39X speedup and 1.37X energy savings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.08600v2-abstract-full').style.display = 'none'; document.getElementById('2209.08600v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.09985">arXiv:2208.09985</a> <span> [<a href="https://arxiv.org/pdf/2208.09985">pdf</a>, <a href="https://arxiv.org/format/2208.09985">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/bioinformatics/btad151">10.1093/bioinformatics/btad151 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Scrooge: A Fast and Memory-Frugal Genomic Sequence Aligner for CPUs, GPUs, and ASICs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Jo毛l Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=G%C3%B3mez-Luna%2C+J">Juan G贸mez-Luna</a>, <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.09985v3-abstract-short" style="display: inline;"> Pairwise sequence alignment is a very time-consuming step in common bioinformatics pipelines. Speeding up this step requires heuristics, efficient implementations, and/or hardware acceleration. A promising candidate for all of the above is the recently proposed GenASM algorithm. We identify and address three inefficiencies in the GenASM algorithm: it has a high amount of data movement, a large mem… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.09985v3-abstract-full').style.display = 'inline'; document.getElementById('2208.09985v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.09985v3-abstract-full" style="display: none;"> Pairwise sequence alignment is a very time-consuming step in common bioinformatics pipelines. Speeding up this step requires heuristics, efficient implementations, and/or hardware acceleration. A promising candidate for all of the above is the recently proposed GenASM algorithm. We identify and address three inefficiencies in the GenASM algorithm: it has a high amount of data movement, a large memory footprint, and does some unnecessary work. We propose Scrooge, a fast and memory-frugal genomic sequence aligner. Scrooge includes three novel algorithmic improvements which reduce the data movement, memory footprint, and the number of operations in the GenASM algorithm. We provide efficient open-source implementations of the Scrooge algorithm for CPUs and GPUs, which demonstrate the significant benefits of our algorithmic improvements. For long reads, the CPU version of Scrooge achieves a 20.1x, 1.7x, and 2.1x speedup over KSW2, Edlib, and a CPU implementation of GenASM, respectively. The GPU version of Scrooge achieves a 4.0x 80.4x, 6.8x, 12.6x and 5.9x speedup over the CPU version of Scrooge, KSW2, Edlib, Darwin-GPU, and a GPU implementation of GenASM, respectively. We estimate an ASIC implementation of Scrooge to use 3.6x less chip area and 2.1x less power than a GenASM ASIC while maintaining the same throughput. Further, we systematically analyze the throughput and accuracy behavior of GenASM and Scrooge under various configurations. As the best configuration of Scrooge depends on the computing platform, we make several observations that can help guide future implementations of Scrooge. Availability: https://github.com/CMU-SAFARI/Scrooge <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.09985v3-abstract-full').style.display = 'none'; document.getElementById('2208.09985v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.09765">arXiv:2207.09765</a> <span> [<a href="https://arxiv.org/pdf/2207.09765">pdf</a>, <a href="https://arxiv.org/format/2207.09765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> ApHMM: Accelerating Profile Hidden Markov Models for Fast and Energy-Efficient Genome Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Pillai%2C+K">Kamlesh Pillai</a>, <a href="/search/q-bio?searchtype=author&query=Kalsi%2C+G+S">Gurpreet S. Kalsi</a>, <a href="/search/q-bio?searchtype=author&query=Suresh%2C+B">Bharathwaj Suresh</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J">Jeremie Kim</a>, <a href="/search/q-bio?searchtype=author&query=Shahroodi%2C+T">Taha Shahroodi</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Luna%2C+J+G">Juan G贸mez Luna</a>, <a href="/search/q-bio?searchtype=author&query=Subramoney%2C+S">Sreenivas Subramoney</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.09765v2-abstract-short" style="display: inline;"> Profile hidden Markov models (pHMMs) are widely employed in various bioinformatics applications to identify similarities between biological sequences, such as DNA or protein sequences. In pHMMs, sequences are represented as graph structures. These probabilities are subsequently used to compute the similarity score between a sequence and a pHMM graph. The Baum-Welch algorithm, a prevalent and highl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09765v2-abstract-full').style.display = 'inline'; document.getElementById('2207.09765v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.09765v2-abstract-full" style="display: none;"> Profile hidden Markov models (pHMMs) are widely employed in various bioinformatics applications to identify similarities between biological sequences, such as DNA or protein sequences. In pHMMs, sequences are represented as graph structures. These probabilities are subsequently used to compute the similarity score between a sequence and a pHMM graph. The Baum-Welch algorithm, a prevalent and highly accurate method, utilizes these probabilities to optimize and compute similarity scores. However, the Baum-Welch algorithm is computationally intensive, and existing solutions offer either software-only or hardware-only approaches with fixed pHMM designs. We identify an urgent need for a flexible, high-performance, and energy-efficient HW/SW co-design to address the major inefficiencies in the Baum-Welch algorithm for pHMMs. We introduce ApHMM, the first flexible acceleration framework designed to significantly reduce both computational and energy overheads associated with the Baum-Welch algorithm for pHMMs. ApHMM tackles the major inefficiencies in the Baum-Welch algorithm by 1) designing flexible hardware to accommodate various pHMM designs, 2) exploiting predictable data dependency patterns through on-chip memory with memoization techniques, 3) rapidly filtering out negligible computations using a hardware-based filter, and 4) minimizing redundant computations. ApHMM achieves substantial speedups of 15.55x - 260.03x, 1.83x - 5.34x, and 27.97x when compared to CPU, GPU, and FPGA implementations of the Baum-Welch algorithm, respectively. ApHMM outperforms state-of-the-art CPU implementations in three key bioinformatics applications: 1) error correction, 2) protein family search, and 3) multiple sequence alignment, by 1.29x - 59.94x, 1.03x - 1.75x, and 1.03x - 1.95x, respectively, while improving their energy efficiency by 64.24x - 115.46x, 1.75x, 1.96x. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09765v2-abstract-full').style.display = 'none'; document.getElementById('2207.09765v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACM TACO</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.06692">arXiv:2206.06692</a> <span> [<a href="https://arxiv.org/pdf/2206.06692">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> COVIDHunter: COVID-19 pandemic wave prediction and mitigation via seasonality-aware modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J+S">Jeremie S. Kim</a>, <a href="/search/q-bio?searchtype=author&query=Alserr%2C+N+A">Nour Almadhoun Alserr</a>, <a href="/search/q-bio?searchtype=author&query=Tell%2C+S+W">Stefan W. Tell</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.06692v1-abstract-short" style="display: inline;"> Early detection and isolation of COVID-19 patients are essential for successful implementation of mitigation strategies and eventually curbing the disease spread. With a limited number of daily COVID-19 tests performed in every country, simulating the COVID-19 spread along with the potential effect of each mitigation strategy currently remains one of the most effective ways in managing the healthc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.06692v1-abstract-full').style.display = 'inline'; document.getElementById('2206.06692v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.06692v1-abstract-full" style="display: none;"> Early detection and isolation of COVID-19 patients are essential for successful implementation of mitigation strategies and eventually curbing the disease spread. With a limited number of daily COVID-19 tests performed in every country, simulating the COVID-19 spread along with the potential effect of each mitigation strategy currently remains one of the most effective ways in managing the healthcare system and guiding policy-makers. We introduce COVIDHunter, a flexible and accurate COVID-19 outbreak simulation model that evaluates the current mitigation measures that are applied to a region, predicts COVID-19 statistics (the daily number of cases, hospitalizations, and deaths), and provides suggestions on what strength the upcoming mitigation measure should be. The key idea of COVIDHunter is to quantify the spread of COVID-19 in a geographical region by simulating the average number of new infections caused by an infected person considering the effect of external factors, such as environmental conditions (e.g., climate, temperature, humidity), different variants of concern, vaccination rate, and mitigation measures. Using Switzerland as a case study, COVIDHunter estimates that we are experiencing a deadly new wave that will peak on 26 January 2022, which is very similar in numbers to the wave we had in February 2020. The policy-makers have only one choice that is to increase the strength of the currently applied mitigation measures for 30 days. Unlike existing models, the COVIDHunter model accurately monitors and predicts the daily number of cases, hospitalizations, and deaths due to COVID-19. Our model is flexible to configure and simple to modify for modeling different scenarios under different environmental conditions and mitigation measures. We release the source code of the COVIDHunter implementation at https://github.com/CMU-SAFARI/COVIDHunter. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.06692v1-abstract-full').style.display = 'none'; document.getElementById('2206.06692v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2102.03667</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.01932">arXiv:2206.01932</a> <span> [<a href="https://arxiv.org/pdf/2206.01932">pdf</a>, <a href="https://arxiv.org/format/2206.01932">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> Demeter: A Fast and Energy-Efficient Food Profiler using Hyperdimensional Computing in Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Shahroodi%2C+T">Taha Shahroodi</a>, <a href="/search/q-bio?searchtype=author&query=Zahedi%2C+M">Mahdi Zahedi</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Wong%2C+S">Stephan Wong</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a>, <a href="/search/q-bio?searchtype=author&query=Hamdioui%2C+S">Said Hamdioui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.01932v2-abstract-short" style="display: inline;"> Food profiling is an essential step in any food monitoring system needed to prevent health risks and potential frauds in the food industry. Significant improvements in sequencing technologies are pushing food profiling to become the main computational bottleneck. State-of-the-art profilers are unfortunately too costly for food profiling. Our goal is to design a food profiler that solves the main… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.01932v2-abstract-full').style.display = 'inline'; document.getElementById('2206.01932v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.01932v2-abstract-full" style="display: none;"> Food profiling is an essential step in any food monitoring system needed to prevent health risks and potential frauds in the food industry. Significant improvements in sequencing technologies are pushing food profiling to become the main computational bottleneck. State-of-the-art profilers are unfortunately too costly for food profiling. Our goal is to design a food profiler that solves the main limitations of existing profilers, namely (1) working on massive data structures and (2) incurring considerable data movement for a real-time monitoring system. To this end, we propose Demeter, the first platform-independent framework for food profiling. Demeter overcomes the first limitation through the use of hyperdimensional computing (HDC) and efficiently performs the accurate few-species classification required in food profiling. We overcome the second limitation by using an in-memory hardware accelerator for Demeter (named Acc-Demeter) based on memristor devices. Acc-Demeter actualizes several domain-specific optimizations and exploits the inherent characteristics of memristors to improve the overall performance and energy consumption of Acc-Demeter. We compare Demeter's accuracy with other industrial food profilers using detailed software modeling. We synthesize Acc-Demeter's required hardware using UMC's 65nm library by considering an accurate PCM model based on silicon-based prototypes. Our evaluations demonstrate that Acc-Demeter achieves a (1) throughput improvement of 192x and 724x and (2) memory reduction of 36x and 33x compared to Kraken2 and MetaCache (2 state-of-the-art profilers), respectively, on typical food-related databases. Demeter maintains an acceptable profiling accuracy (within 2% of existing tools) and incurs a very low area overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.01932v2-abstract-full').style.display = 'none'; document.getElementById('2206.01932v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.07957">arXiv:2205.07957</a> <span> [<a href="https://arxiv.org/pdf/2205.07957">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Going From Molecules to Genomic Variations to Scientific Discovery: Intelligent Algorithms and Architectures for Intelligent Genome Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Almadhoun%2C+N">Nour Almadhoun</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Gomez-Luna%2C+J">Juan Gomez-Luna</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.07957v1-abstract-short" style="display: inline;"> We now need more than ever to make genome analysis more intelligent. We need to read, analyze, and interpret our genomes not only quickly, but also accurately and efficiently enough to scale the analysis to population level. There currently exist major computational bottlenecks and inefficiencies throughout the entire genome analysis pipeline, because state-of-the-art genome sequencing technologie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.07957v1-abstract-full').style.display = 'inline'; document.getElementById('2205.07957v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.07957v1-abstract-full" style="display: none;"> We now need more than ever to make genome analysis more intelligent. We need to read, analyze, and interpret our genomes not only quickly, but also accurately and efficiently enough to scale the analysis to population level. There currently exist major computational bottlenecks and inefficiencies throughout the entire genome analysis pipeline, because state-of-the-art genome sequencing technologies are still not able to read a genome in its entirety. We describe the ongoing journey in significantly improving the performance, accuracy, and efficiency of genome analysis using intelligent algorithms and hardware architectures. We explain state-of-the-art algorithmic methods and hardware-based acceleration approaches for each step of the genome analysis pipeline and provide experimental evaluations. Algorithmic approaches exploit the structure of the genome as well as the structure of the underlying hardware. Hardware-based acceleration approaches exploit specialized microarchitectures or various execution paradigms (e.g., processing inside or near memory) along with algorithmic changes, leading to new hardware/software co-designed systems. We conclude with a foreshadowing of future challenges, benefits, and research directions triggered by the development of both very low cost yet highly error prone new sequencing technologies and specialized hardware chips for genomics. We hope that these efforts and the challenges we discuss provide a foundation for future work in making genome analysis more intelligent. The analysis script and data used in our experimental evaluation are available at: https://github.com/CMU-SAFARI/Molecules2Variations <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.07957v1-abstract-full').style.display = 'none'; document.getElementById('2205.07957v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2008.00961</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.05883">arXiv:2205.05883</a> <span> [<a href="https://arxiv.org/pdf/2205.05883">pdf</a>, <a href="https://arxiv.org/format/2205.05883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3470496.3527436">10.1145/3470496.3527436 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> SeGraM: A Universal Hardware Accelerator for Genomic Sequence-to-Graph and Sequence-to-Sequence Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Kanellopoulos%2C+K">Konstantinos Kanellopoulos</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Bing%C3%B6l%2C+Z">Z眉lal Bing枚l</a>, <a href="/search/q-bio?searchtype=author&query=Kalsi%2C+G+S">Gurpreet S. Kalsi</a>, <a href="/search/q-bio?searchtype=author&query=Zuo%2C+Z">Ziyi Zuo</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J">Jeremie Kim</a>, <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=G%C3%B3mez-Luna%2C+J">Juan G贸mez-Luna</a>, <a href="/search/q-bio?searchtype=author&query=Alserr%2C+N+A">Nour Almadhoun Alserr</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Subramoney%2C+S">Sreenivas Subramoney</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Ghose%2C+S">Saugata Ghose</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.05883v2-abstract-short" style="display: inline;"> A critical step of genome sequence analysis is the mapping of sequenced DNA fragments (i.e., reads) collected from an individual to a known linear reference genome sequence (i.e., sequence-to-sequence mapping). Recent works replace the linear reference sequence with a graph-based representation of the reference genome, which captures the genetic variations and diversity across many individuals in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.05883v2-abstract-full').style.display = 'inline'; document.getElementById('2205.05883v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.05883v2-abstract-full" style="display: none;"> A critical step of genome sequence analysis is the mapping of sequenced DNA fragments (i.e., reads) collected from an individual to a known linear reference genome sequence (i.e., sequence-to-sequence mapping). Recent works replace the linear reference sequence with a graph-based representation of the reference genome, which captures the genetic variations and diversity across many individuals in a population. Mapping reads to the graph-based reference genome (i.e., sequence-to-graph mapping) results in notable quality improvements in genome analysis. Unfortunately, while sequence-to-sequence mapping is well studied with many available tools and accelerators, sequence-to-graph mapping is a more difficult computational problem, with a much smaller number of practical software tools currently available. We analyze two state-of-the-art sequence-to-graph mapping tools and reveal four key issues. We find that there is a pressing need to have a specialized, high-performance, scalable, and low-cost algorithm/hardware co-design that alleviates bottlenecks in both the seeding and alignment steps of sequence-to-graph mapping. To this end, we propose SeGraM, a universal algorithm/hardware co-designed genomic mapping accelerator that can effectively and efficiently support both sequence-to-graph mapping and sequence-to-sequence mapping, for both short and long reads. To our knowledge, SeGraM is the first algorithm/hardware co-design for accelerating sequence-to-graph mapping. SeGraM consists of two main components: (1) MinSeed, the first minimizer-based seeding accelerator; and (2) BitAlign, the first bitvector-based sequence-to-graph alignment accelerator. We demonstrate that SeGraM provides significant improvements for multiple steps of the sequence-to-graph and sequence-to-sequence mapping pipelines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.05883v2-abstract-full').style.display = 'none'; document.getElementById('2205.05883v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in ISCA'22</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.16261">arXiv:2203.16261</a> <span> [<a href="https://arxiv.org/pdf/2203.16261">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Packaging, containerization, and virtualization of computational omics methods: Advances, challenges, and opportunities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Waymost%2C+S">Sharon Waymost</a>, <a href="/search/q-bio?searchtype=author&query=Ayyala%2C+R">Ram Ayyala</a>, <a href="/search/q-bio?searchtype=author&query=Lawlor%2C+B">Brendan Lawlor</a>, <a href="/search/q-bio?searchtype=author&query=Abdill%2C+R+J">Richard J. Abdill</a>, <a href="/search/q-bio?searchtype=author&query=Rajkumar%2C+N">Neha Rajkumar</a>, <a href="/search/q-bio?searchtype=author&query=LaPierre%2C+N">Nathan LaPierre</a>, <a href="/search/q-bio?searchtype=author&query=Brito%2C+J">Jaqueline Brito</a>, <a href="/search/q-bio?searchtype=author&query=Ribeiro-dos-Santos%2C+A+M">Andre M. Ribeiro-dos-Santos</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Almadhoun%2C+N">Nour Almadhoun</a>, <a href="/search/q-bio?searchtype=author&query=Sarwal%2C+V">Varuni Sarwal</a>, <a href="/search/q-bio?searchtype=author&query=Eskin%2C+E">Eleazar Eskin</a>, <a href="/search/q-bio?searchtype=author&query=Hu%2C+Q">Qiyang Hu</a>, <a href="/search/q-bio?searchtype=author&query=Strong%2C+D">Derek Strong</a>, <a href="/search/q-bio?searchtype=author&query=Byoung-Do"> Byoung-Do</a>, <a href="/search/q-bio?searchtype=author&query=Kim"> Kim</a>, <a href="/search/q-bio?searchtype=author&query=Abedalthagafi%2C+M+S">Malak S. Abedalthagafi</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a>, <a href="/search/q-bio?searchtype=author&query=Mangul%2C+S">Serghei Mangul</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.16261v1-abstract-short" style="display: inline;"> Omics software tools have reshaped the landscape of modern biology and become an essential component of biomedical research. The increasing dependence of biomedical scientists on these powerful tools creates a need for easier installation and greater usability. Packaging, virtualization, and containerization are different approaches to satisfy this need by wrapping omics tools in additional softwa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.16261v1-abstract-full').style.display = 'inline'; document.getElementById('2203.16261v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.16261v1-abstract-full" style="display: none;"> Omics software tools have reshaped the landscape of modern biology and become an essential component of biomedical research. The increasing dependence of biomedical scientists on these powerful tools creates a need for easier installation and greater usability. Packaging, virtualization, and containerization are different approaches to satisfy this need by wrapping omics tools in additional software that makes the omics tools easier to install and use. Here, we systematically review practices across prominent packaging, virtualization, and containerization platforms. We outline the challenges, advantages, and limitations of each approach and some of the most widely used platforms from the perspectives of users, software developers, and system administrators. We also propose principles to make packaging, virtualization, and containerization of omics software more sustainable and robust to increase the reproducibility of biomedical and life science research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.16261v1-abstract-full').style.display = 'none'; document.getElementById('2203.16261v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.10400">arXiv:2202.10400</a> <span> [<a href="https://arxiv.org/pdf/2202.10400">pdf</a>, <a href="https://arxiv.org/format/2202.10400">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Operating Systems">cs.OS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> GenStore: A High-Performance and Energy-Efficient In-Storage Computing System for Genome Sequence Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Park%2C+J">Jisung Park</a>, <a href="/search/q-bio?searchtype=author&query=Mustafa%2C+H">Harun Mustafa</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J">Jeremie Kim</a>, <a href="/search/q-bio?searchtype=author&query=Olgun%2C+A">Ataberk Olgun</a>, <a href="/search/q-bio?searchtype=author&query=Gollwitzer%2C+A">Arvid Gollwitzer</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Alserr%2C+N+A">Nour Almadhoun Alserr</a>, <a href="/search/q-bio?searchtype=author&query=Ausavarungnirun%2C+R">Rachata Ausavarungnirun</a>, <a href="/search/q-bio?searchtype=author&query=Vijaykumar%2C+N">Nandita Vijaykumar</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.10400v2-abstract-short" style="display: inline;"> Read mapping is a fundamental, yet computationally-expensive step in many genomics applications. It is used to identify potential matches and differences between fragments (called reads) of a sequenced genome and an already known genome (called a reference genome). To address the computational challenges in genome analysis, many prior works propose various approaches such as filters that select th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.10400v2-abstract-full').style.display = 'inline'; document.getElementById('2202.10400v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.10400v2-abstract-full" style="display: none;"> Read mapping is a fundamental, yet computationally-expensive step in many genomics applications. It is used to identify potential matches and differences between fragments (called reads) of a sequenced genome and an already known genome (called a reference genome). To address the computational challenges in genome analysis, many prior works propose various approaches such as filters that select the reads that must undergo expensive computation, efficient heuristics, and hardware acceleration. While effective at reducing the computation overhead, all such approaches still require the costly movement of a large amount of data from storage to the rest of the system, which can significantly lower the end-to-end performance of read mapping in conventional and emerging genomics systems. We propose GenStore, the first in-storage processing system designed for genome sequence analysis that greatly reduces both data movement and computational overheads of genome sequence analysis by exploiting low-cost and accurate in-storage filters. GenStore leverages hardware/software co-design to address the challenges of in-storage processing, supporting reads with 1) different read lengths and error rates, and 2) different degrees of genetic variation. Through rigorous analysis of read mapping processes, we meticulously design low-cost hardware accelerators and data/computation flows inside a NAND flash-based SSD. Our evaluation using a wide range of real genomic datasets shows that GenStore, when implemented in three modern SSDs, significantly improves the read mapping performance of state-of-the-art software (hardware) baselines by 2.07-6.05$\times$ (1.52-3.32$\times$) for read sets with high similarity to the reference genome and 1.45-33.63$\times$ (2.70-19.2$\times$) for read sets with low similarity to the reference genome. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.10400v2-abstract-full').style.display = 'none'; document.getElementById('2202.10400v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at ASPLOS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.08687">arXiv:2112.08687</a> <span> [<a href="https://arxiv.org/pdf/2112.08687">pdf</a>, <a href="https://arxiv.org/format/2112.08687">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/nargab/lqad004">10.1093/nargab/lqad004 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> BLEND: A Fast, Memory-Efficient, and Accurate Mechanism to Find Fuzzy Seed Matches in Genome Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Park%2C+J">Jisung Park</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J+S">Jeremie S. Kim</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Shahroodi%2C+T">Taha Shahroodi</a>, <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Kanellopoulos%2C+K">Konstantinos Kanellopoulos</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.08687v7-abstract-short" style="display: inline;"> Generating the hash values of short subsequences, called seeds, enables quickly identifying similarities between genomic sequences by matching seeds with a single lookup of their hash values. However, these hash values can be used only for finding exact-matching seeds as the conventional hashing methods assign distinct hash values for different seeds, including highly similar seeds. Finding only e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.08687v7-abstract-full').style.display = 'inline'; document.getElementById('2112.08687v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.08687v7-abstract-full" style="display: none;"> Generating the hash values of short subsequences, called seeds, enables quickly identifying similarities between genomic sequences by matching seeds with a single lookup of their hash values. However, these hash values can be used only for finding exact-matching seeds as the conventional hashing methods assign distinct hash values for different seeds, including highly similar seeds. Finding only exact-matching seeds causes either 1) increasing the use of the costly sequence alignment or 2) limited sensitivity. We introduce BLEND, the first efficient and accurate mechanism that can identify both exact-matching and highly similar seeds with a single lookup of their hash values, called fuzzy seed matches. BLEND 1) utilizes a technique called SimHash, that can generate the same hash value for similar sets, and 2) provides the proper mechanisms for using seeds as sets with the SimHash technique to find fuzzy seed matches efficiently. We show the benefits of BLEND when used in read overlapping and read mapping. For read overlapping, BLEND is faster by 2.4x - 83.9x (on average 19.3x), has a lower memory footprint by 0.9x - 14.1x (on average 3.8x), and finds higher quality overlaps leading to accurate de novo assemblies than the state-of-the-art tool, minimap2. For read mapping, BLEND is faster by 0.8x - 4.1x (on average 1.7x) than minimap2. Source code is available at https://github.com/CMU-SAFARI/BLEND. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.08687v7-abstract-full').style.display = 'none'; document.getElementById('2112.08687v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in NARGAB</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> NAR Genomics and Bioinformatics, vol. 5, no. 1, p. lqad004, Mar. 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.14978">arXiv:2103.14978</a> <span> [<a href="https://arxiv.org/pdf/2103.14978">pdf</a>, <a href="https://arxiv.org/format/2103.14978">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TC.2024.3365931">10.1109/TC.2024.3365931 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> GateKeeper-GPU: Fast and Accurate Pre-Alignment Filtering in Short Read Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Bing%C3%B6l%2C+Z">Z眉lal Bing枚l</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a>, <a href="/search/q-bio?searchtype=author&query=Ozturk%2C+O">Ozcan Ozturk</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.14978v3-abstract-short" style="display: inline;"> At the last step of short read mapping, the candidate locations of the reads on the reference genome are verified to compute their differences from the corresponding reference segments using sequence alignment algorithms. Calculating the similarities and differences between two sequences is still computationally expensive since approximate string matching techniques traditionally inherit dynamic p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.14978v3-abstract-full').style.display = 'inline'; document.getElementById('2103.14978v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.14978v3-abstract-full" style="display: none;"> At the last step of short read mapping, the candidate locations of the reads on the reference genome are verified to compute their differences from the corresponding reference segments using sequence alignment algorithms. Calculating the similarities and differences between two sequences is still computationally expensive since approximate string matching techniques traditionally inherit dynamic programming algorithms with quadratic time and space complexity. We introduce GateKeeper-GPU, a fast and accurate pre-alignment filter that efficiently reduces the need for expensive sequence alignment. GateKeeper-GPU provides two main contributions: first, improving the filtering accuracy of GateKeeper (a lightweight pre-alignment filter), and second, exploiting the massive parallelism provided by the large number of GPU threads of modern GPUs to examine numerous sequence pairs rapidly and concurrently. By reducing the work, GateKeeper-GPU provides an acceleration of 2.9x to sequence alignment and up to 1.4x speedup to the end-to-end execution time of a comprehensive read mapper (mrFAST). GateKeeper-GPU is available at https://github.com/BilkentCompGen/GateKeeper-GPU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.14978v3-abstract-full').style.display = 'none'; document.getElementById('2103.14978v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Computers, 73 (5): 1206-1218, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.03667">arXiv:2102.03667</a> <span> [<a href="https://arxiv.org/pdf/2102.03667">pdf</a>, <a href="https://arxiv.org/format/2102.03667">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Other Statistics">stat.OT</span> </div> </div> <p class="title is-5 mathjax"> COVIDHunter: An Accurate, Flexible, and Environment-Aware Open-Source COVID-19 Outbreak Simulation Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J+S">Jeremie S. Kim</a>, <a href="/search/q-bio?searchtype=author&query=Alserr%2C+N+A">Nour Almadhoun Alserr</a>, <a href="/search/q-bio?searchtype=author&query=Tell%2C+S+W">Stefan W. Tell</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.03667v2-abstract-short" style="display: inline;"> Background: Early detection and isolation of COVID-19 patients are essential for successful implementation of mitigation strategies and eventually curbing the disease spread. With a limited number of daily COVID-19 tests performed in every country, simulating the COVID-19 spread along with the potential effect of each mitigation strategy currently remains one of the most effective ways in managing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.03667v2-abstract-full').style.display = 'inline'; document.getElementById('2102.03667v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.03667v2-abstract-full" style="display: none;"> Background: Early detection and isolation of COVID-19 patients are essential for successful implementation of mitigation strategies and eventually curbing the disease spread. With a limited number of daily COVID-19 tests performed in every country, simulating the COVID-19 spread along with the potential effect of each mitigation strategy currently remains one of the most effective ways in managing the healthcare system and guiding policy-makers. Methods: We introduce COVIDHunter, a flexible and accurate COVID-19 outbreak simulation model that evaluates the current mitigation measures that are applied to a region and provides suggestions on what strength the upcoming mitigation measure should be. The key idea of COVIDHunter is to quantify the spread of COVID-19 in a geographical region by simulating the average number of new infections caused by an infected person considering the effect of external factors, such as environmental conditions (e.g., climate, temperature, humidity) and mitigation measures. Results: Using Switzerland as a case study, COVIDHunter estimates that if the policy-makers relax the mitigation measures by 50% for 30 days then both the daily capacity need for hospital beds and daily number of deaths increase exponentially by an average of 5.1x, who may occupy ICU beds and ventilators for a period of time. Unlike existing models, the COVIDHunter model accurately monitors and predicts the daily number of cases, hospitalizations, and deaths due to COVID-19. Our model is flexible to configure and simple to modify for modeling different scenarios under different environmental conditions and mitigation measures. Availability: We release the source code of the COVIDHunter implementation at https://github.com/CMU- SAFARI/COVIDHunter and show how to flexibly configure our model for any scenario and easily extend it for different measures and conditions than we account for. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.03667v2-abstract-full').style.display = 'none'; document.getElementById('2102.03667v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.07692">arXiv:2009.07692</a> <span> [<a href="https://arxiv.org/pdf/2009.07692">pdf</a>, <a href="https://arxiv.org/format/2009.07692">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> GenASM: A High-Performance, Low-Power Approximate String Matching Acceleration Framework for Genome Sequence Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Kalsi%2C+G+S">Gurpreet S. Kalsi</a>, <a href="/search/q-bio?searchtype=author&query=Bing%C3%B6l%2C+Z">Z眉lal Bing枚l</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Subramanian%2C+L">Lavanya Subramanian</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J+S">Jeremie S. Kim</a>, <a href="/search/q-bio?searchtype=author&query=Ausavarungnirun%2C+R">Rachata Ausavarungnirun</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Gomez-Luna%2C+J">Juan Gomez-Luna</a>, <a href="/search/q-bio?searchtype=author&query=Boroumand%2C+A">Amirali Boroumand</a>, <a href="/search/q-bio?searchtype=author&query=Nori%2C+A">Anant Nori</a>, <a href="/search/q-bio?searchtype=author&query=Scibisz%2C+A">Allison Scibisz</a>, <a href="/search/q-bio?searchtype=author&query=Subramoney%2C+S">Sreenivas Subramoney</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Ghose%2C+S">Saugata Ghose</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.07692v1-abstract-short" style="display: inline;"> Genome sequence analysis has enabled significant advancements in medical and scientific areas such as personalized medicine, outbreak tracing, and the understanding of evolution. Unfortunately, it is currently bottlenecked by the computational power and memory bandwidth limitations of existing systems, as many of the steps in genome sequence analysis must process a large amount of data. A major co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.07692v1-abstract-full').style.display = 'inline'; document.getElementById('2009.07692v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.07692v1-abstract-full" style="display: none;"> Genome sequence analysis has enabled significant advancements in medical and scientific areas such as personalized medicine, outbreak tracing, and the understanding of evolution. Unfortunately, it is currently bottlenecked by the computational power and memory bandwidth limitations of existing systems, as many of the steps in genome sequence analysis must process a large amount of data. A major contributor to this bottleneck is approximate string matching (ASM). We propose GenASM, the first ASM acceleration framework for genome sequence analysis. We modify the underlying ASM algorithm (Bitap) to significantly increase its parallelism and reduce its memory footprint, and we design the first hardware accelerator for Bitap. Our hardware accelerator consists of specialized compute units and on-chip SRAMs that are designed to match the rate of computation with memory capacity and bandwidth. We demonstrate that GenASM is a flexible, high-performance, and low-power framework, which provides significant performance and power benefits for three different use cases in genome sequence analysis: 1) GenASM accelerates read alignment for both long reads and short reads. For long reads, GenASM outperforms state-of-the-art software and hardware accelerators by 116x and 3.9x, respectively, while consuming 37x and 2.7x less power. For short reads, GenASM outperforms state-of-the-art software and hardware accelerators by 111x and 1.9x. 2) GenASM accelerates pre-alignment filtering for short reads, with 3.7x the performance of a state-of-the-art pre-alignment filter, while consuming 1.7x less power and significantly improving the filtering accuracy. 3) GenASM accelerates edit distance calculation, with 22-12501x and 9.3-400x speedups over the state-of-the-art software library and FPGA-based accelerator, respectively, while consuming 548-582x and 67x less power. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.07692v1-abstract-full').style.display = 'none'; document.getElementById('2009.07692v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in MICRO 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.00961">arXiv:2008.00961</a> <span> [<a href="https://arxiv.org/pdf/2008.00961">pdf</a>, <a href="https://arxiv.org/format/2008.00961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MM.2020.3013728">10.1109/MM.2020.3013728 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Accelerating Genome Analysis: A Primer on an Ongoing Journey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Bing%C3%B6l%2C+Z">Z眉lal Bing枚l</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J">Jeremie Kim</a>, <a href="/search/q-bio?searchtype=author&query=Ghose%2C+S">Saugata Ghose</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.00961v2-abstract-short" style="display: inline;"> Genome analysis fundamentally starts with a process known as read mapping, where sequenced fragments of an organism's genome are compared against a reference genome. Read mapping is currently a major bottleneck in the entire genome analysis pipeline, because state-of-the-art genome sequencing technologies are able to sequence a genome much faster than the computational techniques employed to analy… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.00961v2-abstract-full').style.display = 'inline'; document.getElementById('2008.00961v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.00961v2-abstract-full" style="display: none;"> Genome analysis fundamentally starts with a process known as read mapping, where sequenced fragments of an organism's genome are compared against a reference genome. Read mapping is currently a major bottleneck in the entire genome analysis pipeline, because state-of-the-art genome sequencing technologies are able to sequence a genome much faster than the computational techniques employed to analyze the genome. We describe the ongoing journey in significantly improving the performance of read mapping. We explain state-of-the-art algorithmic methods and hardware-based acceleration approaches. Algorithmic approaches exploit the structure of the genome as well as the structure of the underlying hardware. Hardware-based acceleration approaches exploit specialized microarchitectures or various execution paradigms (e.g., processing inside or near memory). We conclude with the challenges of adopting these hardware-accelerated read mappers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.00961v2-abstract-full').style.display = 'none'; document.getElementById('2008.00961v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is an extended and updated version of a paper published in IEEE Micro, vol. 40, no. 5, pp. 65-75, 1 Sept.-Oct. 2020, https://doi.org/10.1109/MM.2020.3013728</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Micro, Volume: 40, Issue: 5, Sept.-Oct. 1 2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.00110">arXiv:2003.00110</a> <span> [<a href="https://arxiv.org/pdf/2003.00110">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1186/s13059-021-02443-7">10.1186/s13059-021-02443-7 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Technology dictates algorithms: Recent developments in read alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Rotman%2C+J">Jeremy Rotman</a>, <a href="/search/q-bio?searchtype=author&query=Taraszka%2C+K">Kodi Taraszka</a>, <a href="/search/q-bio?searchtype=author&query=Shi%2C+H">Huwenbo Shi</a>, <a href="/search/q-bio?searchtype=author&query=Baykal%2C+P+I">Pelin Icer Baykal</a>, <a href="/search/q-bio?searchtype=author&query=Yang%2C+H+T">Harry Taegyun Yang</a>, <a href="/search/q-bio?searchtype=author&query=Xue%2C+V">Victor Xue</a>, <a href="/search/q-bio?searchtype=author&query=Knyazev%2C+S">Sergey Knyazev</a>, <a href="/search/q-bio?searchtype=author&query=Singer%2C+B+D">Benjamin D. Singer</a>, <a href="/search/q-bio?searchtype=author&query=Balliu%2C+B">Brunilda Balliu</a>, <a href="/search/q-bio?searchtype=author&query=Koslicki%2C+D">David Koslicki</a>, <a href="/search/q-bio?searchtype=author&query=Skums%2C+P">Pavel Skums</a>, <a href="/search/q-bio?searchtype=author&query=Zelikovsky%2C+A">Alex Zelikovsky</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a>, <a href="/search/q-bio?searchtype=author&query=Mangul%2C+S">Serghei Mangul</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.00110v3-abstract-short" style="display: inline;"> Massively parallel sequencing techniques have revolutionized biological and medical sciences by providing unprecedented insight into the genomes of humans, animals, and microbes. Modern sequencing platforms generate enormous amounts of genomic data in the form of nucleotide sequences or reads. Aligning reads onto reference genomes enables the identification of individual-specific genetic variants… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.00110v3-abstract-full').style.display = 'inline'; document.getElementById('2003.00110v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.00110v3-abstract-full" style="display: none;"> Massively parallel sequencing techniques have revolutionized biological and medical sciences by providing unprecedented insight into the genomes of humans, animals, and microbes. Modern sequencing platforms generate enormous amounts of genomic data in the form of nucleotide sequences or reads. Aligning reads onto reference genomes enables the identification of individual-specific genetic variants and is an essential step of the majority of genomic analysis pipelines. Aligned reads are essential for answering important biological questions, such as detecting mutations driving various human diseases and complex traits as well as identifying species present in metagenomic samples. The read alignment problem is extremely challenging due to the large size of analyzed datasets and numerous technological limitations of sequencing platforms, and researchers have developed novel bioinformatics algorithms to tackle these difficulties. Importantly, computational algorithms have evolved and diversified in accordance with technological advances, leading to todays diverse array of bioinformatics tools. Our review provides a survey of algorithmic foundations and methodologies across 107 alignment methods published between 1988 and 2020, for both short and long reads. We provide rigorous experimental evaluation of 11 read aligners to demonstrate the effect of these underlying algorithms on speed and efficiency of read aligners. We separately discuss how longer read lengths produce unique advantages and limitations to read alignment techniques. We also discuss how general alignment algorithms have been tailored to the specific needs of various domains in biology, including whole transcriptome, adaptive immune repertoire, and human microbiome studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.00110v3-abstract-full').style.display = 'none'; document.getElementById('2003.00110v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Genome Biol . Aug 26;22(1):249, 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.08735">arXiv:1912.08735</a> <span> [<a href="https://arxiv.org/pdf/1912.08735">pdf</a>, <a href="https://arxiv.org/format/1912.08735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> AirLift: A Fast and Comprehensive Technique for Remapping Alignments between Reference Genomes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Kim%2C+J+S">Jeremie S. Kim</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Hajinazar%2C+N">Nastaran Hajinazar</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.08735v5-abstract-short" style="display: inline;"> AirLift is the first read remapping tool that enables users to quickly and comprehensively map a read set, that had been previously mapped to one reference genome, to another similar reference. Users can then quickly run a downstream analysis of read sets for each latest reference release. Compared to the state-of-the-art method for remapping reads (i.e., full mapping), AirLift reduces the overall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.08735v5-abstract-full').style.display = 'inline'; document.getElementById('1912.08735v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.08735v5-abstract-full" style="display: none;"> AirLift is the first read remapping tool that enables users to quickly and comprehensively map a read set, that had been previously mapped to one reference genome, to another similar reference. Users can then quickly run a downstream analysis of read sets for each latest reference release. Compared to the state-of-the-art method for remapping reads (i.e., full mapping), AirLift reduces the overall execution time to remap read sets between two reference genome versions by up to 27.4x. We validate our remapping results with GATK and find that AirLift provides high accuracy in identifying ground truth SNP/INDEL variants AirLift source code and readme describing how to reproduce our results are available at https://github.com/CMU-SAFARI/AirLift. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.08735v5-abstract-full').style.display = 'none'; document.getElementById('1912.08735v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in the IEEE/ACM TCBB journal: https://ieeexplore.ieee.org/document/10638724</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.09020">arXiv:1910.09020</a> <span> [<a href="https://arxiv.org/pdf/1910.09020">pdf</a>, <a href="https://arxiv.org/format/1910.09020">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/bioinformatics/btaa1015">10.1093/bioinformatics/btaa1015 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> SneakySnake: A Fast and Accurate Universal Genome Pre-Alignment Filter for CPUs, GPUs, and FPGAs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Shahroodi%2C+T">Taha Shahroodi</a>, <a href="/search/q-bio?searchtype=author&query=Gomez-Luna%2C+J">Juan Gomez-Luna</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.09020v3-abstract-short" style="display: inline;"> Motivation: We introduce SneakySnake, a highly parallel and highly accurate pre-alignment filter that remarkably reduces the need for computationally costly sequence alignment. The key idea of SneakySnake is to reduce the approximate string matching (ASM) problem to the single net routing (SNR) problem in VLSI chip layout. In the SNR problem, we are interested in finding the optimal path that conn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.09020v3-abstract-full').style.display = 'inline'; document.getElementById('1910.09020v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.09020v3-abstract-full" style="display: none;"> Motivation: We introduce SneakySnake, a highly parallel and highly accurate pre-alignment filter that remarkably reduces the need for computationally costly sequence alignment. The key idea of SneakySnake is to reduce the approximate string matching (ASM) problem to the single net routing (SNR) problem in VLSI chip layout. In the SNR problem, we are interested in finding the optimal path that connects two terminals with the least routing cost on a special grid layout that contains obstacles. The SneakySnake algorithm quickly solves the SNR problem and uses the found optimal path to decide whether or not performing sequence alignment is necessary. Reducing the ASM problem into SNR also makes SneakySnake efficient to implement on CPUs, GPUs, and FPGAs. Results: SneakySnake significantly improves the accuracy of pre-alignment filtering by up to four orders of magnitude compared to the state-of-the-art pre-alignment filters, Shouji, GateKeeper, and SHD. For short sequences, SneakySnake accelerates Edlib (state-of-the-art implementation of Myers's bit-vector algorithm) and Parasail (state-of-the-art sequence aligner with a configurable scoring function), by up to 37.7x and 43.9x (>12x on average), respectively, with its CPU implementation, and by up to 413x and 689x (>400x on average), respectively, with FPGA and GPU acceleration. For long sequences, the CPU implementation of SneakySnake accelerates Parasail and KSW2 (sequence aligner of minimap2) by up to 979x (276.9x on average) and 91.7x (31.7x on average), respectively. As SneakySnake does not replace sequence alignment, users can still obtain all capabilities (e.g., configurable scoring functions) of the aligner of their choice, unlike existing acceleration efforts that sacrifice some aligner capabilities. Availability: https://github.com/CMU-SAFARI/SneakySnake <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.09020v3-abstract-full').style.display = 'none'; document.getElementById('1910.09020v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in Bioinformatics</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Bioinformatics, Apr 1;36(22-23):5282-5290, 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.03936">arXiv:1910.03936</a> <span> [<a href="https://arxiv.org/pdf/1910.03936">pdf</a>, <a href="https://arxiv.org/format/1910.03936">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Accelerating the Understanding of Life's Code Through Better Algorithms and Hardware Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M+H+K">Mohammed H K Alser</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.03936v1-abstract-short" style="display: inline;"> Calculating the similarities between a pair of genomic sequences is one of the most fundamental computational steps in genomic analysis. This step -- called sequence alignment -- is the computational bottleneck because: (1) it is implemented using quadratic-time dynamic programming algorithms and (2) the majority of candidate locations in the reference genome do not align with a given read due to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.03936v1-abstract-full').style.display = 'inline'; document.getElementById('1910.03936v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.03936v1-abstract-full" style="display: none;"> Calculating the similarities between a pair of genomic sequences is one of the most fundamental computational steps in genomic analysis. This step -- called sequence alignment -- is the computational bottleneck because: (1) it is implemented using quadratic-time dynamic programming algorithms and (2) the majority of candidate locations in the reference genome do not align with a given read due to high dissimilarity. Calculating the alignment of such incorrect candidate locations consumes an overwhelming majority of a modern read mapper's execution time. In this thesis, we introduce four new algorithms (GateKeeper, Shouji, MAGNET, and SneakySnake) that function as a pre-alignment step and aim to filter out most incorrect candidate locations. The first key idea of our pre-alignment filters is to provide high filtering accuracy by correctly detecting all similar segments shared between two sequences. The second key idea is to exploit the massively parallel architecture of modern FPGAs for accelerating our filtering algorithms. We also develop an efficient CPU implementation of the SneakySnake algorithm for commodity desktops and servers. We evaluate the benefits and downsides of our pre-alignment filtering approach in detail using 12 real datasets. In our evaluation, we demonstrate that our hardware pre-alignment filters show two to three orders of magnitude speedup over their equivalent CPU implementations. We also demonstrate that integrating our hardware pre-alignment filters with the state-of-the-art read aligners reduces the aligner's execution time by up to 21.5x. Finally, we show that efficient CPU implementation of pre-alignment filtering still provides significant benefits. We show that SneakySnake on average reduces the execution time of the best performing CPU-based read aligners Edlib and Parasail, by up to 43x and 57.9x, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.03936v1-abstract-full').style.display = 'none'; document.getElementById('1910.03936v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1902.04341">arXiv:1902.04341</a> <span> [<a href="https://arxiv.org/pdf/1902.04341">pdf</a>, <a href="https://arxiv.org/format/1902.04341">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/bioinformatics/btaa179">10.1093/bioinformatics/btaa179 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Apollo: A Sequencing-Technology-Independent, Scalable, and Accurate Assembly Polishing Algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J+S">Jeremie S. Kim</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Cicek%2C+A+E">A. Ercument Cicek</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1902.04341v2-abstract-short" style="display: inline;"> Long reads produced by third-generation sequencing technologies are used to construct an assembly (i.e., the subject's genome), which is further used in downstream genome analysis. Unfortunately, long reads have high sequencing error rates and a large proportion of bps in these long reads are incorrectly identified. These errors propagate to the assembly and affect the accuracy of genome analysis.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.04341v2-abstract-full').style.display = 'inline'; document.getElementById('1902.04341v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1902.04341v2-abstract-full" style="display: none;"> Long reads produced by third-generation sequencing technologies are used to construct an assembly (i.e., the subject's genome), which is further used in downstream genome analysis. Unfortunately, long reads have high sequencing error rates and a large proportion of bps in these long reads are incorrectly identified. These errors propagate to the assembly and affect the accuracy of genome analysis. Assembly polishing algorithms minimize such error propagation by polishing or fixing errors in the assembly by using information from alignments between reads and the assembly (i.e., read-to-assembly alignment information). However, assembly polishing algorithms can only polish an assembly using reads either from a certain sequencing technology or from a small assembly. Such technology-dependency and assembly-size dependency require researchers to 1) run multiple polishing algorithms and 2) use small chunks of a large genome to use all available read sets and polish large genomes. We introduce Apollo, a universal assembly polishing algorithm that scales well to polish an assembly of any size (i.e., both large and small genomes) using reads from all sequencing technologies (i.e., second- and third-generation). Our goal is to provide a single algorithm that uses read sets from all available sequencing technologies to improve the accuracy of assembly polishing and that can polish large genomes. Apollo 1) models an assembly as a profile hidden Markov model (pHMM), 2) uses read-to-assembly alignment to train the pHMM with the Forward-Backward algorithm, and 3) decodes the trained model with the Viterbi algorithm to produce a polished assembly. Our experiments with real read sets demonstrate that Apollo is the only algorithm that 1) uses reads from any sequencing technology within a single run and 2) scales well to polish large assemblies without splitting the assembly into multiple parts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1902.04341v2-abstract-full').style.display = 'none'; document.getElementById('1902.04341v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 1 figure. Accepted in Bioinformatics</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Bioinformatics . 2020 Jun 1;36(12):3669-3679 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1809.07858">arXiv:1809.07858</a> <span> [<a href="https://arxiv.org/pdf/1809.07858">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/bioinformatics/btz234">10.1093/bioinformatics/btz234 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Shouji: A Fast and Efficient Pre-Alignment Filter for Sequence Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Hassan%2C+H">Hasan Hassan</a>, <a href="/search/q-bio?searchtype=author&query=Kumar%2C+A">Akash Kumar</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1809.07858v4-abstract-short" style="display: inline;"> Motivation: The ability to generate massive amounts of sequencing data continues to overwhelm the processing capability of existing algorithms and compute infrastructures. In this work, we explore the use of hardware/software co-design and hardware acceleration to significantly reduce the execution time of short sequence alignment, a crucial step in analyzing sequenced genomes. We introduce Shouji… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.07858v4-abstract-full').style.display = 'inline'; document.getElementById('1809.07858v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1809.07858v4-abstract-full" style="display: none;"> Motivation: The ability to generate massive amounts of sequencing data continues to overwhelm the processing capability of existing algorithms and compute infrastructures. In this work, we explore the use of hardware/software co-design and hardware acceleration to significantly reduce the execution time of short sequence alignment, a crucial step in analyzing sequenced genomes. We introduce Shouji, a highly-parallel and accurate pre-alignment filter that remarkably reduces the need for computationally-costly dynamic programming algorithms. The first key idea of our proposed pre-alignment filter is to provide high filtering accuracy by correctly detecting all common subsequences shared between two given sequences. The second key idea is to design a hardware accelerator that adopts modern FPGA (Field-Programmable Gate Array) architectures to further boost the performance of our algorithm. Results: Shouji significantly improves the accuracy of pre-alignment filtering by up to two orders of magnitude compared to the state-of-the-art pre-alignment filters, GateKeeper and SHD. Our FPGA-based accelerator is up to three orders of magnitude faster than the equivalent CPU implementation of Shouji. Using a single FPGA chip, we benchmark the benefits of integrating Shouji with five state-of-the-art sequence aligners, designed for different computing platforms. The addition of Shouji as a pre-alignment step reduces the execution time of the five state-of-the-art sequence aligners by up to 18.8x. Shouji can be adapted for any bioinformatics pipeline that performs sequence alignment for verification. Unlike most existing methods that aim to accelerate sequence alignment, Shouji does not sacrifice any of the aligner capabilities, as it does not modify or replace the alignment step. Availability: https://github.com/CMU-SAFARI/Shouji <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.07858v4-abstract-full').style.display = 'none'; document.getElementById('1809.07858v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://academic.oup.com/bioinformatics/advance-article-abstract/doi/10.1093/bioinformatics/btz234/5421509, Bioinformatics Journal 2019</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Bioinformatics, Nov 1; 35 (21): 4255 - 4263, 2019 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1711.01177">arXiv:1711.01177</a> <span> [<a href="https://arxiv.org/pdf/1711.01177">pdf</a>, <a href="https://arxiv.org/format/1711.01177">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1186/s12864-018-4460-0">10.1186/s12864-018-4460-0 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> GRIM-Filter: Fast Seed Location Filtering in DNA Read Mapping Using Processing-in-Memory Technologies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Kim%2C+J+S">Jeremie S. Kim</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Xin%2C+H">Hongyi Xin</a>, <a href="/search/q-bio?searchtype=author&query=Lee%2C+D">Donghyuk Lee</a>, <a href="/search/q-bio?searchtype=author&query=Ghose%2C+S">Saugata Ghose</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Hassan%2C+H">Hasan Hassan</a>, <a href="/search/q-bio?searchtype=author&query=Ergin%2C+O">Oguz Ergin</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1711.01177v1-abstract-short" style="display: inline;"> Motivation: Seed location filtering is critical in DNA read mapping, a process where billions of DNA fragments (reads) sampled from a donor are mapped onto a reference genome to identify genomic variants of the donor. State-of-the-art read mappers 1) quickly generate possible mapping locations for seeds (i.e., smaller segments) within each read, 2) extract reference sequences at each of the mappin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1711.01177v1-abstract-full').style.display = 'inline'; document.getElementById('1711.01177v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1711.01177v1-abstract-full" style="display: none;"> Motivation: Seed location filtering is critical in DNA read mapping, a process where billions of DNA fragments (reads) sampled from a donor are mapped onto a reference genome to identify genomic variants of the donor. State-of-the-art read mappers 1) quickly generate possible mapping locations for seeds (i.e., smaller segments) within each read, 2) extract reference sequences at each of the mapping locations, and 3) check similarity between each read and its associated reference sequences with a computationally-expensive algorithm (i.e., sequence alignment) to determine the origin of the read. A seed location filter comes into play before alignment, discarding seed locations that alignment would deem a poor match. The ideal seed location filter would discard all poor match locations prior to alignment such that there is no wasted computation on unnecessary alignments. Results: We propose a novel seed location filtering algorithm, GRIM-Filter, optimized to exploit 3D-stacked memory systems that integrate computation within a logic layer stacked under memory layers, to perform processing-in-memory (PIM). GRIM-Filter quickly filters seed locations by 1) introducing a new representation of coarse-grained segments of the reference genome, and 2) using massively-parallel in-memory operations to identify read presence within each coarse-grained segment. Our evaluations show that for a sequence alignment error tolerance of 0.05, GRIM-Filter 1) reduces the false negative rate of filtering by 5.59x--6.41x, and 2) provides an end-to-end read mapper speedup of 1.81x--3.65x, compared to a state-of-the-art read mapper employing the best previous seed location filtering algorithm. Availability: The code is available online at: https://github.com/CMU-SAFARI/GRIM <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1711.01177v1-abstract-full').style.display = 'none'; document.getElementById('1711.01177v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:1708.04329</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> BMC Genomics, 19 (Suppl 2):89, 2018 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1708.04329">arXiv:1708.04329</a> <span> [<a href="https://arxiv.org/pdf/1708.04329">pdf</a>, <a href="https://arxiv.org/format/1708.04329">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> GRIM-filter: fast seed filtering in read mapping using emerging memory technologies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Kim%2C+J+S">Jeremie S Kim</a>, <a href="/search/q-bio?searchtype=author&query=Senol%2C+D">Damla Senol</a>, <a href="/search/q-bio?searchtype=author&query=Xin%2C+H">Hongyi Xin</a>, <a href="/search/q-bio?searchtype=author&query=Lee%2C+D">Donghyuk Lee</a>, <a href="/search/q-bio?searchtype=author&query=Ghose%2C+S">Saugata Ghose</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Hassan%2C+H">Hasan Hassan</a>, <a href="/search/q-bio?searchtype=author&query=Ergin%2C+O">Oguz Ergin</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1708.04329v1-abstract-short" style="display: inline;"> Motivation: Seed filtering is critical in DNA read mapping, a process where billions of DNA fragments (reads) sampled from a donor are mapped onto a reference genome to identify genomic variants of the donor. Read mappers 1) quickly generate possible mapping locations (i.e., seeds) for each read, 2) extract reference sequences at each of the mapping locations, and then 3) check similarity between… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1708.04329v1-abstract-full').style.display = 'inline'; document.getElementById('1708.04329v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1708.04329v1-abstract-full" style="display: none;"> Motivation: Seed filtering is critical in DNA read mapping, a process where billions of DNA fragments (reads) sampled from a donor are mapped onto a reference genome to identify genomic variants of the donor. Read mappers 1) quickly generate possible mapping locations (i.e., seeds) for each read, 2) extract reference sequences at each of the mapping locations, and then 3) check similarity between each read and its associated reference sequences with a computationally expensive dynamic programming algorithm (alignment) to determine the origin of the read. Location filters come into play before alignment, discarding seed locations that alignment would have deemed a poor match. The ideal location filter would discard all poor matching locations prior to alignment such that there is no wasted computation on poor alignments. Results: We propose a novel filtering algorithm, GRIM-Filter, optimized to exploit emerging 3D-stacked memory systems that integrate computation within a stacked logic layer, enabling processing-in-memory (PIM). GRIM-Filter quickly filters locations by 1) introducing a new representation of coarse-grained segments of the reference genome and 2) using massively-parallel in-memory operations to identify read presence within each coarse-grained segment. Our evaluations show that for 5% error acceptance rates, GRIM-Filter eliminates 5.59x-6.41x more false negatives and exhibits end-to-end speedups of 1.81x-3.65x compared to mappers employing the best previous filtering algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1708.04329v1-abstract-full').style.display = 'none'; document.getElementById('1708.04329v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1707.01631">arXiv:1707.01631</a> <span> [<a href="https://arxiv.org/pdf/1707.01631">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> MAGNET: Understanding and Improving the Accuracy of Genome Pre-Alignment Filtering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1707.01631v2-abstract-short" style="display: inline;"> In the era of high throughput DNA sequencing (HTS) technologies, calculating the edit distance (i.e., the minimum number of substitutions, insertions, and deletions between a pair of sequences) for billions of genomic sequences is the computational bottleneck in todays read mappers. The shifted Hamming distance (SHD) algorithm proposes a fast filtering strategy that can rapidly filter out invalid… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1707.01631v2-abstract-full').style.display = 'inline'; document.getElementById('1707.01631v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1707.01631v2-abstract-full" style="display: none;"> In the era of high throughput DNA sequencing (HTS) technologies, calculating the edit distance (i.e., the minimum number of substitutions, insertions, and deletions between a pair of sequences) for billions of genomic sequences is the computational bottleneck in todays read mappers. The shifted Hamming distance (SHD) algorithm proposes a fast filtering strategy that can rapidly filter out invalid mappings that have more edits than allowed. However, SHD shows high inaccuracy in its filtering by admitting invalid mappings to be marked as correct ones. This wastes the execution time and imposes a large computational burden. In this work, we comprehensively investigate four sources that lead to the filtering inaccuracy. We propose MAGNET, a new filtering strategy that maintains high accuracy across different edit distance thresholds and data sets. It significantly improves the accuracy of pre-alignment filtering by one to two orders of magnitude. The MATLAB implementations of MAGNET and SHD are open source and available at: https://github.com/BilkentCompGen/MAGNET. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1707.01631v2-abstract-full').style.display = 'none'; document.getElementById('1707.01631v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 July, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 Pages, 13 Figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IPSI Transactions on Internet Research, 13(2), 2017 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1604.01789">arXiv:1604.01789</a> <span> [<a href="https://arxiv.org/pdf/1604.01789">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/bioinformatics/btx342">10.1093/bioinformatics/btx342 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> GateKeeper: A New Hardware Architecture for Accelerating Pre-Alignment in DNA Short Read Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Hassan%2C+H">Hasan Hassan</a>, <a href="/search/q-bio?searchtype=author&query=Xin%2C+H">Hongyi Xin</a>, <a href="/search/q-bio?searchtype=author&query=Ergin%2C+O">O臒uz Ergin</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1604.01789v3-abstract-short" style="display: inline;"> Motivation: High throughput DNA sequencing (HTS) technologies generate an excessive number of small DNA segments -- called short reads -- that cause significant computational burden. To analyze the entire genome, each of the billions of short reads must be mapped to a reference genome based on the similarity between a read and "candidate" locations in that reference genome. The similarity measurem… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1604.01789v3-abstract-full').style.display = 'inline'; document.getElementById('1604.01789v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1604.01789v3-abstract-full" style="display: none;"> Motivation: High throughput DNA sequencing (HTS) technologies generate an excessive number of small DNA segments -- called short reads -- that cause significant computational burden. To analyze the entire genome, each of the billions of short reads must be mapped to a reference genome based on the similarity between a read and "candidate" locations in that reference genome. The similarity measurement, called alignment, formulated as an approximate string matching problem, is the computational bottleneck because: (1) it is implemented using quadratic-time dynamic programming algorithms, and (2) the majority of candidate locations in the reference genome do not align with a given read due to high dissimilarity. Calculating the alignment of such incorrect candidate locations consumes an overwhelming majority of a modern read mapper's execution time. Therefore, it is crucial to develop a fast and effective filter that can detect incorrect candidate locations and eliminate them before invoking computationally costly alignment operations. Results: We propose GateKeeper, a new hardware accelerator that functions as a pre-alignment step that quickly filters out most incorrect candidate locations. GateKeeper is the first design to accelerate pre-alignment using Field-Programmable Gate Arrays (FPGAs), which can perform pre-alignment much faster than software. GateKeeper can be integrated with any mapper that performs sequence alignment for verification. When implemented on a single FPGA chip, GateKeeper maintains high accuracy (on average >96%) while providing up to 90-fold and 130-fold speedup over the state-of-the-art software pre-alignment techniques, Adjacency Filter and Shifted Hamming Distance (SHD), respectively. The addition of GateKeeper as a pre-alignment step can reduce the verification time of the mrFAST mapper by a factor of 10. Availability: https://github.com/BilkentCompGen/GateKeeper <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1604.01789v3-abstract-full').style.display = 'none'; document.getElementById('1604.01789v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 April, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Bioinformatics. Nov 1;33(21):3355-3363, 2017 </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>