Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–9 of 9 results for author: <span class="mathjax">Cavlak, M B</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/q-bio" aria-role="search"> Searching in archive <strong>q-bio</strong>. <a href="/search/?searchtype=author&query=Cavlak%2C+M+B">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Cavlak, M B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Cavlak%2C+M+B&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Cavlak, M B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19113">arXiv:2406.19113</a> <span> [<a href="https://arxiv.org/pdf/2406.19113">pdf</a>, <a href="https://arxiv.org/format/2406.19113">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> MegIS: High-Performance, Energy-Efficient, and Low-Cost Metagenomic Analysis with In-Storage Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Mustafa%2C+H">Harun Mustafa</a>, <a href="/search/q-bio?searchtype=author&query=Gollwitzer%2C+A">Arvid Gollwitzer</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Eudine%2C+J">Julien Eudine</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Jo毛l Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Park%2C+J">Jisung Park</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19113v1-abstract-short" style="display: inline;"> Metagenomics has led to significant advances in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases. Metagenomic analysis suffers from significant data movement overhead due to moving large amounts of low-reuse data from the storage system. In-storag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19113v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19113v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19113v1-abstract-full" style="display: none;"> Metagenomics has led to significant advances in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases. Metagenomic analysis suffers from significant data movement overhead due to moving large amounts of low-reuse data from the storage system. In-storage processing can be a fundamental solution for reducing this overhead. However, designing an in-storage processing system for metagenomics is challenging because existing approaches to metagenomic analysis cannot be directly implemented in storage effectively due to the hardware limitations of modern SSDs. We propose MegIS, the first in-storage processing system designed to significantly reduce the data movement overhead of the end-to-end metagenomic analysis pipeline. MegIS is enabled by our lightweight design that effectively leverages and orchestrates processing inside and outside the storage system. We address in-storage processing challenges for metagenomics via specialized and efficient 1) task partitioning, 2) data/computation flow coordination, 3) storage technology-aware algorithmic optimizations, 4) data mapping, and 5) lightweight in-storage accelerators. MegIS's design is flexible, capable of supporting different types of metagenomic input datasets, and can be integrated into various metagenomic analysis pipelines. Our evaluation shows that MegIS outperforms the state-of-the-art performance- and accuracy-optimized software metagenomic tools by 2.7$\times$-37.2$\times$ and 6.9$\times$-100.2$\times$, respectively, while matching the accuracy of the accuracy-optimized tool. MegIS achieves 1.5$\times$-5.1$\times$ speedup compared to the state-of-the-art metagenomic hardware-accelerated (using processing-in-memory) tool, while achieving significantly higher accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19113v1-abstract-full').style.display = 'none'; document.getElementById('2406.19113v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in ISCA 2024. arXiv admin note: substantial text overlap with arXiv:2311.12527</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12527">arXiv:2311.12527</a> <span> [<a href="https://arxiv.org/pdf/2311.12527">pdf</a>, <a href="https://arxiv.org/format/2311.12527">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> MetaStore: High-Performance Metagenomic Analysis via In-Storage Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Mustafa%2C+H">Harun Mustafa</a>, <a href="/search/q-bio?searchtype=author&query=Gollwitzer%2C+A">Arvid Gollwitzer</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Eudine%2C+J">Julien Eudine</a>, <a href="/search/q-bio?searchtype=author&query=Ma%2C+H">Haiyu Ma</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Jo毛l Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Park%2C+J">Jisung Park</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12527v1-abstract-short" style="display: inline;"> Metagenomics has led to significant advancements in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases containing information on different species' genomes. Metagenomic analysis suffers from significant data movement overhead due to moving large amo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12527v1-abstract-full').style.display = 'inline'; document.getElementById('2311.12527v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12527v1-abstract-full" style="display: none;"> Metagenomics has led to significant advancements in many fields. Metagenomic analysis commonly involves the key tasks of determining the species present in a sample and their relative abundances. These tasks require searching large metagenomic databases containing information on different species' genomes. Metagenomic analysis suffers from significant data movement overhead due to moving large amounts of low-reuse data from the storage system to the rest of the system. In-storage processing can be a fundamental solution for reducing data movement overhead. However, designing an in-storage processing system for metagenomics is challenging because none of the existing approaches can be directly implemented in storage effectively due to the hardware limitations of modern SSDs. We propose MetaStore, the first in-storage processing system designed to significantly reduce the data movement overhead of end-to-end metagenomic analysis. MetaStore is enabled by our lightweight and cooperative design that effectively leverages and orchestrates processing inside and outside the storage system. Through our detailed analysis of the end-to-end metagenomic analysis pipeline and careful hardware/software co-design, we address in-storage processing challenges for metagenomics via specialized and efficient 1) task partitioning, 2) data/computation flow coordination, 3) storage technology-aware algorithmic optimizations, 4) light-weight in-storage accelerators, and 5) data mapping. Our evaluation shows that MetaStore outperforms the state-of-the-art performance- and accuracy-optimized software metagenomic tools by 2.7-37.2$\times$ and 6.9-100.2$\times$, respectively, while matching the accuracy of the accuracy-optimized tool. MetaStore achieves 1.5-5.1$\times$ speedup compared to the state-of-the-art metagenomic hardware-accelerated tool, while achieving significantly higher accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12527v1-abstract-full').style.display = 'none'; document.getElementById('2311.12527v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.09200">arXiv:2301.09200</a> <span> [<a href="https://arxiv.org/pdf/2301.09200">pdf</a>, <a href="https://arxiv.org/format/2301.09200">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/bioinformatics/btad272">10.1093/bioinformatics/btad272 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> RawHash: Enabling Fast and Accurate Real-Time Analysis of Raw Nanopore Signals for Large Genomes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Mao%2C+H">Haiyu Mao</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.09200v4-abstract-short" style="display: inline;"> Nanopore sequencers generate electrical raw signals in real-time while sequencing long genomic strands. These raw signals can be analyzed as they are generated, providing an opportunity for real-time genome analysis. An important feature of nanopore sequencing, Read Until, can eject strands from sequencers without fully sequencing them, which provides opportunities to computationally reduce the se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.09200v4-abstract-full').style.display = 'inline'; document.getElementById('2301.09200v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.09200v4-abstract-full" style="display: none;"> Nanopore sequencers generate electrical raw signals in real-time while sequencing long genomic strands. These raw signals can be analyzed as they are generated, providing an opportunity for real-time genome analysis. An important feature of nanopore sequencing, Read Until, can eject strands from sequencers without fully sequencing them, which provides opportunities to computationally reduce the sequencing time and cost. However, existing works utilizing Read Until either 1) require powerful computational resources that may not be available for portable sequencers or 2) lack scalability for large genomes, rendering them inaccurate or ineffective. We propose RawHash, the first mechanism that can accurately and efficiently perform real-time analysis of nanopore raw signals for large genomes using a hash-based similarity search. To enable this, RawHash ensures the signals corresponding to the same DNA content lead to the same hash value, regardless of the slight variations in these signals. RawHash achieves an accurate hash-based similarity search via an effective quantization of the raw signals such that signals corresponding to the same DNA content have the same quantized value and, subsequently, the same hash value. We evaluate RawHash on three applications: 1) read mapping, 2) relative abundance estimation, and 3) contamination analysis. Our evaluations show that RawHash is the only tool that can provide high accuracy and high throughput for analyzing large genomes in real-time. When compared to the state-of-the-art techniques, UNCALLED and Sigmap, RawHash provides 1) 25.8x and 3.4x better average throughput and 2) significantly better accuracy for large genomes, respectively. Source code is available at https://github.com/CMU-SAFARI/RawHash. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.09200v4-abstract-full').style.display = 'none'; document.getElementById('2301.09200v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in proceedings of ISMB/ECCB 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.04953">arXiv:2212.04953</a> <span> [<a href="https://arxiv.org/pdf/2212.04953">pdf</a>, <a href="https://arxiv.org/format/2212.04953">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3389/fgene.2024.1429306">10.3389/fgene.2024.1429306 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> TargetCall: Eliminating the Wasted Computation in Basecalling via Pre-Basecalling Filtering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Jo毛l Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Sadrosadati%2C+M">Mohammad Sadrosadati</a>, <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.04953v3-abstract-short" style="display: inline;"> Basecalling is an essential step in nanopore sequencing analysis where the raw signals of nanopore sequencers are converted into nucleotide sequences, i.e., reads. State-of-the-art basecallers employ complex deep learning models to achieve high basecalling accuracy. This makes basecalling computationally inefficient and memory-hungry, bottlenecking the entire genome analysis pipeline. However, for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.04953v3-abstract-full').style.display = 'inline'; document.getElementById('2212.04953v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.04953v3-abstract-full" style="display: none;"> Basecalling is an essential step in nanopore sequencing analysis where the raw signals of nanopore sequencers are converted into nucleotide sequences, i.e., reads. State-of-the-art basecallers employ complex deep learning models to achieve high basecalling accuracy. This makes basecalling computationally inefficient and memory-hungry, bottlenecking the entire genome analysis pipeline. However, for many applications, the majority of reads do no match the reference genome of interest (i.e., target reference) and thus are discarded in later steps in the genomics pipeline, wasting the basecalling computation. To overcome this issue, we propose TargetCall, the first pre-basecalling filter to eliminate the wasted computation in basecalling. TargetCall's key idea is to discard reads that will not match the target reference (i.e., off-target reads) prior to basecalling. TargetCall consists of two main components: (1) LightCall, a lightweight neural network basecaller that produces noisy reads; and (2) Similarity Check, which labels each of these noisy reads as on-target or off-target by matching them to the target reference. Our thorough experimental evaluations show that TargetCall 1) improves the end-to-end basecalling runtime performance of the state-of-the-art basecaller by 3.31x while maintaining high (98.88%) recall in keeping on-target reads, 2) maintains high accuracy in downstream analysis, and 3) achieves better runtime performance, throughput, recall, precision, and generality compared to prior works. TargetCall is available at https://github.com/CMU-SAFARI/TargetCall. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.04953v3-abstract-full').style.display = 'none'; document.getElementById('2212.04953v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.03079">arXiv:2211.03079</a> <span> [<a href="https://arxiv.org/pdf/2211.03079">pdf</a>, <a href="https://arxiv.org/format/2211.03079">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> RUBICON: A Framework for Designing Efficient Deep Learning-Based Genomic Basecallers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Denolf%2C+K">Kristof Denolf</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Khodamoradi%2C+A">Alireza Khodamoradi</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Corporaal%2C+H">Henk Corporaal</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.03079v7-abstract-short" style="display: inline;"> Nanopore sequencing generates noisy electrical signals that need to be converted into a standard string of DNA nucleotide bases using a computational step called basecalling. The accuracy and speed of basecalling have critical implications for all later steps in genome analysis. Many researchers adopt complex deep learning-based models to perform basecalling without considering the compute demands… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03079v7-abstract-full').style.display = 'inline'; document.getElementById('2211.03079v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.03079v7-abstract-full" style="display: none;"> Nanopore sequencing generates noisy electrical signals that need to be converted into a standard string of DNA nucleotide bases using a computational step called basecalling. The accuracy and speed of basecalling have critical implications for all later steps in genome analysis. Many researchers adopt complex deep learning-based models to perform basecalling without considering the compute demands of such models, which leads to slow, inefficient, and memory-hungry basecallers. Therefore, there is a need to reduce the computation and memory cost of basecalling while maintaining accuracy. Our goal is to develop a comprehensive framework for creating deep learning-based basecallers that provide high efficiency and performance. We introduce RUBICON, a framework to develop hardware-optimized basecallers. RUBICON consists of two novel machine-learning techniques that are specifically designed for basecalling. First, we introduce the first quantization-aware basecalling neural architecture search (QABAS) framework to specialize the basecalling neural network architecture for a given hardware acceleration platform while jointly exploring and finding the best bit-width precision for each neural network layer. Second, we develop SkipClip, the first technique to remove the skip connections present in modern basecallers to greatly reduce resource and storage requirements without any loss in basecalling accuracy. We demonstrate the benefits of RUBICON by developing RUBICALL, the first hardware-optimized basecaller that performs fast and accurate basecalling. Compared to the fastest state-of-the-art basecaller, RUBICALL provides a 3.96x speedup with 2.97% higher accuracy. We show that RUBICON helps researchers develop hardware-optimized basecallers that are superior to expert-designed models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.03079v7-abstract-full').style.display = 'none'; document.getElementById('2211.03079v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.09765">arXiv:2207.09765</a> <span> [<a href="https://arxiv.org/pdf/2207.09765">pdf</a>, <a href="https://arxiv.org/format/2207.09765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> ApHMM: Accelerating Profile Hidden Markov Models for Fast and Energy-Efficient Genome Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Pillai%2C+K">Kamlesh Pillai</a>, <a href="/search/q-bio?searchtype=author&query=Kalsi%2C+G+S">Gurpreet S. Kalsi</a>, <a href="/search/q-bio?searchtype=author&query=Suresh%2C+B">Bharathwaj Suresh</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J">Jeremie Kim</a>, <a href="/search/q-bio?searchtype=author&query=Shahroodi%2C+T">Taha Shahroodi</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Luna%2C+J+G">Juan G贸mez Luna</a>, <a href="/search/q-bio?searchtype=author&query=Subramoney%2C+S">Sreenivas Subramoney</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.09765v2-abstract-short" style="display: inline;"> Profile hidden Markov models (pHMMs) are widely employed in various bioinformatics applications to identify similarities between biological sequences, such as DNA or protein sequences. In pHMMs, sequences are represented as graph structures. These probabilities are subsequently used to compute the similarity score between a sequence and a pHMM graph. The Baum-Welch algorithm, a prevalent and highl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09765v2-abstract-full').style.display = 'inline'; document.getElementById('2207.09765v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.09765v2-abstract-full" style="display: none;"> Profile hidden Markov models (pHMMs) are widely employed in various bioinformatics applications to identify similarities between biological sequences, such as DNA or protein sequences. In pHMMs, sequences are represented as graph structures. These probabilities are subsequently used to compute the similarity score between a sequence and a pHMM graph. The Baum-Welch algorithm, a prevalent and highly accurate method, utilizes these probabilities to optimize and compute similarity scores. However, the Baum-Welch algorithm is computationally intensive, and existing solutions offer either software-only or hardware-only approaches with fixed pHMM designs. We identify an urgent need for a flexible, high-performance, and energy-efficient HW/SW co-design to address the major inefficiencies in the Baum-Welch algorithm for pHMMs. We introduce ApHMM, the first flexible acceleration framework designed to significantly reduce both computational and energy overheads associated with the Baum-Welch algorithm for pHMMs. ApHMM tackles the major inefficiencies in the Baum-Welch algorithm by 1) designing flexible hardware to accommodate various pHMM designs, 2) exploiting predictable data dependency patterns through on-chip memory with memoization techniques, 3) rapidly filtering out negligible computations using a hardware-based filter, and 4) minimizing redundant computations. ApHMM achieves substantial speedups of 15.55x - 260.03x, 1.83x - 5.34x, and 27.97x when compared to CPU, GPU, and FPGA implementations of the Baum-Welch algorithm, respectively. ApHMM outperforms state-of-the-art CPU implementations in three key bioinformatics applications: 1) error correction, 2) protein family search, and 3) multiple sequence alignment, by 1.29x - 59.94x, 1.03x - 1.75x, and 1.03x - 1.95x, respectively, while improving their energy efficiency by 64.24x - 115.46x, 1.75x, 1.96x. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.09765v2-abstract-full').style.display = 'none'; document.getElementById('2207.09765v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACM TACO</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.05883">arXiv:2205.05883</a> <span> [<a href="https://arxiv.org/pdf/2205.05883">pdf</a>, <a href="https://arxiv.org/format/2205.05883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3470496.3527436">10.1145/3470496.3527436 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> SeGraM: A Universal Hardware Accelerator for Genomic Sequence-to-Graph and Sequence-to-Sequence Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Kanellopoulos%2C+K">Konstantinos Kanellopoulos</a>, <a href="/search/q-bio?searchtype=author&query=Lindegger%2C+J">Joel Lindegger</a>, <a href="/search/q-bio?searchtype=author&query=Bing%C3%B6l%2C+Z">Z眉lal Bing枚l</a>, <a href="/search/q-bio?searchtype=author&query=Kalsi%2C+G+S">Gurpreet S. Kalsi</a>, <a href="/search/q-bio?searchtype=author&query=Zuo%2C+Z">Ziyi Zuo</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Kim%2C+J">Jeremie Kim</a>, <a href="/search/q-bio?searchtype=author&query=Ghiasi%2C+N+M">Nika Mansouri Ghiasi</a>, <a href="/search/q-bio?searchtype=author&query=Singh%2C+G">Gagandeep Singh</a>, <a href="/search/q-bio?searchtype=author&query=G%C3%B3mez-Luna%2C+J">Juan G贸mez-Luna</a>, <a href="/search/q-bio?searchtype=author&query=Alserr%2C+N+A">Nour Almadhoun Alserr</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Subramoney%2C+S">Sreenivas Subramoney</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Ghose%2C+S">Saugata Ghose</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.05883v2-abstract-short" style="display: inline;"> A critical step of genome sequence analysis is the mapping of sequenced DNA fragments (i.e., reads) collected from an individual to a known linear reference genome sequence (i.e., sequence-to-sequence mapping). Recent works replace the linear reference sequence with a graph-based representation of the reference genome, which captures the genetic variations and diversity across many individuals in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.05883v2-abstract-full').style.display = 'inline'; document.getElementById('2205.05883v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.05883v2-abstract-full" style="display: none;"> A critical step of genome sequence analysis is the mapping of sequenced DNA fragments (i.e., reads) collected from an individual to a known linear reference genome sequence (i.e., sequence-to-sequence mapping). Recent works replace the linear reference sequence with a graph-based representation of the reference genome, which captures the genetic variations and diversity across many individuals in a population. Mapping reads to the graph-based reference genome (i.e., sequence-to-graph mapping) results in notable quality improvements in genome analysis. Unfortunately, while sequence-to-sequence mapping is well studied with many available tools and accelerators, sequence-to-graph mapping is a more difficult computational problem, with a much smaller number of practical software tools currently available. We analyze two state-of-the-art sequence-to-graph mapping tools and reveal four key issues. We find that there is a pressing need to have a specialized, high-performance, scalable, and low-cost algorithm/hardware co-design that alleviates bottlenecks in both the seeding and alignment steps of sequence-to-graph mapping. To this end, we propose SeGraM, a universal algorithm/hardware co-designed genomic mapping accelerator that can effectively and efficiently support both sequence-to-graph mapping and sequence-to-sequence mapping, for both short and long reads. To our knowledge, SeGraM is the first algorithm/hardware co-design for accelerating sequence-to-graph mapping. SeGraM consists of two main components: (1) MinSeed, the first minimizer-based seeding accelerator; and (2) BitAlign, the first bitvector-based sequence-to-graph alignment accelerator. We demonstrate that SeGraM provides significant improvements for multiple steps of the sequence-to-graph and sequence-to-sequence mapping pipelines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.05883v2-abstract-full').style.display = 'none'; document.getElementById('2205.05883v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in ISCA'22</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.06255">arXiv:2201.06255</a> <span> [<a href="https://arxiv.org/pdf/2201.06255">pdf</a>, <a href="https://arxiv.org/format/2201.06255">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1093/bioinformatics/btac554">10.1093/bioinformatics/btac554 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> FastRemap: A Tool for Quickly Remapping Reads between Genome Assemblies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Kim%2C+J+S">Jeremie S. Kim</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.06255v3-abstract-short" style="display: inline;"> A genome read data set can be quickly and efficiently remapped from one reference to another similar reference (e.g., between two reference versions or two similar species) using a variety of tools, e.g., the commonly-used CrossMap tool. With the explosion of available genomic data sets and references, high-performance remapping tools will be even more important for keeping up with the computation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.06255v3-abstract-full').style.display = 'inline'; document.getElementById('2201.06255v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.06255v3-abstract-full" style="display: none;"> A genome read data set can be quickly and efficiently remapped from one reference to another similar reference (e.g., between two reference versions or two similar species) using a variety of tools, e.g., the commonly-used CrossMap tool. With the explosion of available genomic data sets and references, high-performance remapping tools will be even more important for keeping up with the computational demands of genome assembly and analysis. We provide FastRemap, a fast and efficient tool for remapping reads between genome assemblies. FastRemap provides up to a 7.82$\times$ speedup (6.47$\times$, on average) and uses as low as 61.7% (80.7%, on average) of the peak memory consumption compared to the state-of-the-art remapping tool, CrossMap. FastRemap is written in C++. The source code and user manual are freely available at: github.com/CMU-SAFARI/FastRemap. Docker image available at: https://hub.docker.com/r/alkanlab/fast. Also available in Bioconda at: https://anaconda.org/bioconda/fastremap-bio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.06255v3-abstract-full').style.display = 'none'; document.getElementById('2201.06255v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">FastRemap is open source and all scripts needed to replicate the results in this paper can be found at https://github.com/CMU-SAFARI/FastRemap</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Bioinformatics, Sep 30; 38(19):4633-4635, 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.08735">arXiv:1912.08735</a> <span> [<a href="https://arxiv.org/pdf/1912.08735">pdf</a>, <a href="https://arxiv.org/format/1912.08735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> AirLift: A Fast and Comprehensive Technique for Remapping Alignments between Reference Genomes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&query=Kim%2C+J+S">Jeremie S. Kim</a>, <a href="/search/q-bio?searchtype=author&query=Firtina%2C+C">Can Firtina</a>, <a href="/search/q-bio?searchtype=author&query=Cavlak%2C+M+B">Meryem Banu Cavlak</a>, <a href="/search/q-bio?searchtype=author&query=Cali%2C+D+S">Damla Senol Cali</a>, <a href="/search/q-bio?searchtype=author&query=Alser%2C+M">Mohammed Alser</a>, <a href="/search/q-bio?searchtype=author&query=Hajinazar%2C+N">Nastaran Hajinazar</a>, <a href="/search/q-bio?searchtype=author&query=Alkan%2C+C">Can Alkan</a>, <a href="/search/q-bio?searchtype=author&query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.08735v5-abstract-short" style="display: inline;"> AirLift is the first read remapping tool that enables users to quickly and comprehensively map a read set, that had been previously mapped to one reference genome, to another similar reference. Users can then quickly run a downstream analysis of read sets for each latest reference release. Compared to the state-of-the-art method for remapping reads (i.e., full mapping), AirLift reduces the overall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.08735v5-abstract-full').style.display = 'inline'; document.getElementById('1912.08735v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.08735v5-abstract-full" style="display: none;"> AirLift is the first read remapping tool that enables users to quickly and comprehensively map a read set, that had been previously mapped to one reference genome, to another similar reference. Users can then quickly run a downstream analysis of read sets for each latest reference release. Compared to the state-of-the-art method for remapping reads (i.e., full mapping), AirLift reduces the overall execution time to remap read sets between two reference genome versions by up to 27.4x. We validate our remapping results with GATK and find that AirLift provides high accuracy in identifying ground truth SNP/INDEL variants AirLift source code and readme describing how to reproduce our results are available at https://github.com/CMU-SAFARI/AirLift. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.08735v5-abstract-full').style.display = 'none'; document.getElementById('1912.08735v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in the IEEE/ACM TCBB journal: https://ieeexplore.ieee.org/document/10638724</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository