CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;27 of 27 results for author: <span class="mathjax">Song, Y S</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/q-bio" aria-role="search"> Searching in archive <strong>q-bio</strong>. <a href="/search/?searchtype=author&amp;query=Song%2C+Y+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Song, Y S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Song%2C+Y+S&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Song, Y S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.11435">arXiv:2407.11435</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.11435">pdf</a>, <a href="https://arxiv.org/format/2407.11435">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Genomic Language Models: Opportunities and Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Benegas%2C+G">Gonzalo Benegas</a>, <a href="/search/q-bio?searchtype=author&amp;query=Ye%2C+C">Chengzhong Ye</a>, <a href="/search/q-bio?searchtype=author&amp;query=Albors%2C+C">Carlos Albors</a>, <a href="/search/q-bio?searchtype=author&amp;query=Li%2C+J+C">Jianan Canal Li</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.11435v2-abstract-short" style="display: inline;"> Large language models (LLMs) are having transformative impacts across a wide range of scientific fields, particularly in the biomedical sciences. Just as the goal of Natural Language Processing is to understand sequences of words, a major objective in biology is to understand biological sequences. Genomic Language Models (gLMs), which are LLMs trained on DNA sequences, have the potential to signif&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11435v2-abstract-full').style.display = 'inline'; document.getElementById('2407.11435v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.11435v2-abstract-full" style="display: none;"> Large language models (LLMs) are having transformative impacts across a wide range of scientific fields, particularly in the biomedical sciences. Just as the goal of Natural Language Processing is to understand sequences of words, a major objective in biology is to understand biological sequences. Genomic Language Models (gLMs), which are LLMs trained on DNA sequences, have the potential to significantly advance our understanding of genomes and how DNA elements at various scales interact to give rise to complex functions. To showcase this potential, we highlight key applications of gLMs, including functional constraint prediction, sequence design, and transfer learning. Despite notable recent progress, however, developing effective and efficient gLMs presents numerous challenges, especially for species with large, complex genomes. Here, we discuss major considerations for developing and evaluating gLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11435v2-abstract-full').style.display = 'none'; document.getElementById('2407.11435v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Review article; 26 pages, 3 figures, 1 table</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 92-08; 92B20; 68T50; 68T07 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.17153">arXiv:2402.17153</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.17153">pdf</a>, <a href="https://arxiv.org/format/2402.17153">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> Exact and efficient phylodynamic simulation from arbitrarily large populations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Celentano%2C+M">Michael Celentano</a>, <a href="/search/q-bio?searchtype=author&amp;query=DeWitt%2C+W+S">William S. DeWitt</a>, <a href="/search/q-bio?searchtype=author&amp;query=Prillo%2C+S">Sebastian Prillo</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.17153v2-abstract-short" style="display: inline;"> Many biological studies involve inferring the evolutionary history of a sample of individuals from a large population and interpreting the reconstructed tree. Such an ascertained tree typically represents only a small part of a comprehensive population tree and is distorted by survivorship and sampling biases. Inferring evolutionary parameters from ascertained trees requires modeling both the unde&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17153v2-abstract-full').style.display = 'inline'; document.getElementById('2402.17153v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.17153v2-abstract-full" style="display: none;"> Many biological studies involve inferring the evolutionary history of a sample of individuals from a large population and interpreting the reconstructed tree. Such an ascertained tree typically represents only a small part of a comprehensive population tree and is distorted by survivorship and sampling biases. Inferring evolutionary parameters from ascertained trees requires modeling both the underlying population dynamics and the ascertainment process. A crucial component of this phylodynamic modeling involves tree simulation, which is used to benchmark probabilistic inference methods. To simulate an ascertained tree, one must first simulate the full population tree and then prune unobserved lineages. Consequently, the computational cost is determined not by the size of the final simulated tree, but by the size of the population tree in which it is embedded. In most biological scenarios, simulations of the entire population are prohibitively expensive due to computational demands placed on lineages without sampled descendants. Here, we address this challenge by proving that, for any partially ascertained process from a general multi-type birth-death-mutation-sampling model, there exists an equivalent process with complete sampling and no death, a property which we leverage to develop a highly efficient algorithm for simulating trees. Our algorithm scales linearly with the size of the final simulated tree and is independent of the population size, enabling simulations from extremely large populations beyond the reach of current methods but essential for various biological applications. We anticipate that this unprecedented speedup will significantly advance the development of novel inference methods that require extensive training data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17153v2-abstract-full').style.display = 'none'; document.getElementById('2402.17153v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">37 pages, 7 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 92-10; 92D15; 60J80 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.15261">arXiv:2109.15261</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.15261">pdf</a>, <a href="https://arxiv.org/format/2109.15261">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> A simple and flexible test of sample exchangeability with applications to statistical genomics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Aw%2C+A+J">Alan J. Aw</a>, <a href="/search/q-bio?searchtype=author&amp;query=Spence%2C+J+P">Jeffrey P. Spence</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.15261v3-abstract-short" style="display: inline;"> In scientific studies involving analyses of multivariate data, basic but important questions often arise for the researcher: Is the sample exchangeable, meaning that the joint distribution of the sample is invariant to the ordering of the units? Are the features independent of one another, or perhaps the features can be grouped so that the groups are mutually independent? In statistical genomics,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.15261v3-abstract-full').style.display = 'inline'; document.getElementById('2109.15261v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.15261v3-abstract-full" style="display: none;"> In scientific studies involving analyses of multivariate data, basic but important questions often arise for the researcher: Is the sample exchangeable, meaning that the joint distribution of the sample is invariant to the ordering of the units? Are the features independent of one another, or perhaps the features can be grouped so that the groups are mutually independent? In statistical genomics, these considerations are fundamental to downstream tasks such as demographic inference and the construction of polygenic risk scores. We propose a non-parametric approach, which we call the V test, to address these two questions, namely, a test of sample exchangeability given dependency structure of features, and a test of feature independence given sample exchangeability. Our test is conceptually simple, yet fast and flexible. It controls the Type I error across realistic scenarios, and handles data of arbitrary dimensions by leveraging large-sample asymptotics. Through extensive simulations and a comparison against unsupervised tests of stratification based on random matrix theory, we find that our test compares favorably in various scenarios of interest. We apply the test to data from the 1000 Genomes Project, demonstrating how it can be employed to assess exchangeability of the genetic sample, or find optimal linkage disequilibrium (LD) splits for downstream analysis. For exchangeability assessment, we find that removing rare variants can substantially increase the p-value of the test statistic. For optimal LD splitting, the V test reports different optimal splits than previous approaches not relying on hypothesis testing. Software for our methods is available in R (CRAN: flintyR) and Python (PyPI: flintyPy). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.15261v3-abstract-full').style.display = 'none'; document.getElementById('2109.15261v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages. Supplementary Information file (38 pages, contains mathematical proofs) is available at https://github.com/songlab-cal/flinty/</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 62G10; 62H15; 62P10 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> G.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.10590">arXiv:2105.10590</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2105.10590">pdf</a>, <a href="https://arxiv.org/format/2105.10590">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Parallelizing Contextual Bandits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Chan%2C+J">Jeffrey Chan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Pacchiano%2C+A">Aldo Pacchiano</a>, <a href="/search/q-bio?searchtype=author&amp;query=Tripuraneni%2C+N">Nilesh Tripuraneni</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a>, <a href="/search/q-bio?searchtype=author&amp;query=Bartlett%2C+P">Peter Bartlett</a>, <a href="/search/q-bio?searchtype=author&amp;query=Jordan%2C+M+I">Michael I. Jordan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.10590v2-abstract-short" style="display: inline;"> Standard approaches to decision-making under uncertainty focus on sequential exploration of the space of decisions. However, \textit{simultaneously} proposing a batch of decisions, which leverages available resources for parallel experimentation, has the potential to rapidly accelerate exploration. We present a family of (parallel) contextual bandit algorithms applicable to problems with bounded e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.10590v2-abstract-full').style.display = 'inline'; document.getElementById('2105.10590v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.10590v2-abstract-full" style="display: none;"> Standard approaches to decision-making under uncertainty focus on sequential exploration of the space of decisions. However, \textit{simultaneously} proposing a batch of decisions, which leverages available resources for parallel experimentation, has the potential to rapidly accelerate exploration. We present a family of (parallel) contextual bandit algorithms applicable to problems with bounded eluder dimension whose regret is nearly identical to their perfectly sequential counterparts -- given access to the same total number of oracle queries -- up to a lower-order ``burn-in&#34; term. We further show these algorithms can be specialized to the class of linear reward functions where we introduce and analyze several new linear bandit algorithms which explicitly introduce diversity into their action selection. Finally, we also present an empirical evaluation of these parallel algorithms in several domains, including materials discovery and biological sequence design problems, to demonstrate the utility of parallelized bandits in practical settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.10590v2-abstract-full').style.display = 'none'; document.getElementById('2105.10590v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.13034">arXiv:2010.13034</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.13034">pdf</a>, <a href="https://arxiv.org/format/2010.13034">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biological Physics">physics.bio-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Analysis, Statistics and Probability">physics.data-an</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.bpj.2021.02.004">10.1016/j.bpj.2021.02.004 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> EGGTART: A computational tool to visualize the dynamics of biophysical transport processes under the inhomogeneous $\ell$-TASEP </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Erdmann-Pham%2C+D+D">Dan D. Erdmann-Pham</a>, <a href="/search/q-bio?searchtype=author&amp;query=Son%2C+W">Wonjun Son</a>, <a href="/search/q-bio?searchtype=author&amp;query=Duc%2C+K+D">Khanh Dao Duc</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.13034v1-abstract-short" style="display: inline;"> The totally asymmetric simple exclusion process (TASEP), which describes the stochastic dynamics of interacting particles on a lattice, has been actively studied over the past several decades and applied to model important biological transport processes. Here we present a software package, called EGGTART (Extensive GUI gives TASEP-realization in real time), which quantifies and visualizes the dyna&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.13034v1-abstract-full').style.display = 'inline'; document.getElementById('2010.13034v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.13034v1-abstract-full" style="display: none;"> The totally asymmetric simple exclusion process (TASEP), which describes the stochastic dynamics of interacting particles on a lattice, has been actively studied over the past several decades and applied to model important biological transport processes. Here we present a software package, called EGGTART (Extensive GUI gives TASEP-realization in real time), which quantifies and visualizes the dynamics associated with a generalized version of the TASEP with an extended particle size and heterogeneous jump rates. This computational tool is based on analytic formulas obtained from deriving and solving the hydrodynamic limit of the process. It allows an immediate quantification of the particle density, flux, and phase diagram, as a function of a few key parameters associated with the system, which would be difficult to achieve via conventional stochastic simulations. Our software should therefore be of interest to biophysicists studying general transport processes, and can in particular be used in the context of gene expression to model and quantify mRNA translation of different coding sequences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.13034v1-abstract-full').style.display = 'none'; document.getElementById('2010.13034v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.08230">arXiv:1906.08230</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1906.08230">pdf</a>, <a href="https://arxiv.org/format/1906.08230">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Protein Transfer Learning with TAPE </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Rao%2C+R">Roshan Rao</a>, <a href="/search/q-bio?searchtype=author&amp;query=Bhattacharya%2C+N">Nicholas Bhattacharya</a>, <a href="/search/q-bio?searchtype=author&amp;query=Thomas%2C+N">Neil Thomas</a>, <a href="/search/q-bio?searchtype=author&amp;query=Duan%2C+Y">Yan Duan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Chen%2C+X">Xi Chen</a>, <a href="/search/q-bio?searchtype=author&amp;query=Canny%2C+J">John Canny</a>, <a href="/search/q-bio?searchtype=author&amp;query=Abbeel%2C+P">Pieter Abbeel</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.08230v1-abstract-short" style="display: inline;"> Protein modeling is an increasingly popular area of machine learning research. Semi-supervised learning has emerged as an important paradigm in protein modeling due to the high cost of acquiring supervised protein labels, but the current literature is fragmented when it comes to datasets and standardized evaluation techniques. To facilitate progress in this field, we introduce the Tasks Assessing&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.08230v1-abstract-full').style.display = 'inline'; document.getElementById('1906.08230v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.08230v1-abstract-full" style="display: none;"> Protein modeling is an increasingly popular area of machine learning research. Semi-supervised learning has emerged as an important paradigm in protein modeling due to the high cost of acquiring supervised protein labels, but the current literature is fragmented when it comes to datasets and standardized evaluation techniques. To facilitate progress in this field, we introduce the Tasks Assessing Protein Embeddings (TAPE), a set of five biologically relevant semi-supervised learning tasks spread across different domains of protein biology. We curate tasks into specific training, validation, and test splits to ensure that each task tests biologically relevant generalization that transfers to real-life scenarios. We benchmark a range of approaches to semi-supervised protein representation learning, which span recent work as well as canonical sequence learning techniques. We find that self-supervised pretraining is helpful for almost all models on all tasks, more than doubling performance in some cases. Despite this increase, in several cases features learned by self-supervised pretraining still lag behind features extracted by state-of-the-art non-neural techniques. This gap in performance suggests a huge opportunity for innovative architecture design and improved modeling paradigms that better capture the signal in biological sequences. TAPE will help the machine learning community focus effort on scientifically relevant problems. Toward this end, all data and code used to run these experiments are available at https://github.com/songlab-cal/tape. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.08230v1-abstract-full').style.display = 'none'; document.getElementById('1906.08230v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1807.02763">arXiv:1807.02763</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1807.02763">pdf</a>, <a href="https://arxiv.org/format/1807.02763">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> Inference of Population History using Coalescent HMMs: Review and Outlook </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Spence%2C+J+P">Jeffrey P. Spence</a>, <a href="/search/q-bio?searchtype=author&amp;query=Steinr%C3%BCcken%2C+M">Matthias Steinr眉cken</a>, <a href="/search/q-bio?searchtype=author&amp;query=Terhorst%2C+J">Jonathan Terhorst</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1807.02763v1-abstract-short" style="display: inline;"> Studying how diverse human populations are related is of historical and anthropological interest, in addition to providing a realistic null model for testing for signatures of natural selection or disease associations. Furthermore, understanding the demographic histories of other species is playing an increasingly important role in conservation genetics. A number of statistical methods have been d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.02763v1-abstract-full').style.display = 'inline'; document.getElementById('1807.02763v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1807.02763v1-abstract-full" style="display: none;"> Studying how diverse human populations are related is of historical and anthropological interest, in addition to providing a realistic null model for testing for signatures of natural selection or disease associations. Furthermore, understanding the demographic histories of other species is playing an increasingly important role in conservation genetics. A number of statistical methods have been developed to infer population demographic histories using whole-genome sequence data, with recent advances focusing on allowing for more flexible modeling choices, scaling to larger data sets, and increasing statistical power. Here we review coalescent hidden Markov models, a powerful class of population genetic inference methods that can effectively utilize linkage disequilibrium information. We highlight recent advances, give advice for practitioners, point out potential pitfalls, and present possible future research directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.02763v1-abstract-full').style.display = 'none'; document.getElementById('1807.02763v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1803.05609">arXiv:1803.05609</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1803.05609">pdf</a>, <a href="https://arxiv.org/format/1803.05609">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mathematical Physics">math-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistical Mechanics">cond-mat.stat-mech</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.cels.2019.12.003">10.1016/j.cels.2019.12.003 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The key parameters that govern translation efficiency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Erdmann-Pham%2C+D+D">Dan D. Erdmann-Pham</a>, <a href="/search/q-bio?searchtype=author&amp;query=Duc%2C+K+D">Khanh Dao Duc</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1803.05609v2-abstract-short" style="display: inline;"> Translation of mRNA into protein is a fundamental yet complex biological process with multiple factors that can potentially affect its efficiency. Here, we study a stochastic model describing the traffic flow of ribosomes along the mRNA (namely, the inhomogeneous $\ell$-TASEP), and identify the key parameters that govern the overall rate of protein synthesis, sensitivity to initiation rate changes&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1803.05609v2-abstract-full').style.display = 'inline'; document.getElementById('1803.05609v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1803.05609v2-abstract-full" style="display: none;"> Translation of mRNA into protein is a fundamental yet complex biological process with multiple factors that can potentially affect its efficiency. Here, we study a stochastic model describing the traffic flow of ribosomes along the mRNA (namely, the inhomogeneous $\ell$-TASEP), and identify the key parameters that govern the overall rate of protein synthesis, sensitivity to initiation rate changes, and efficiency of ribosome usage. By analyzing a continuum limit of the model, we obtain closed-form expressions for stationary currents and ribosomal densities, which agree well with Monte Carlo simulations. Furthermore, we completely characterize the phase transitions in the system, and by applying our theoretical results, we formulate design principles that detail how to tune the key parameters we identified to optimize translation efficiency. Using ribosome profiling data from S. cerevisiae, we shows that its translation system is generally consistent with these principles. Our theoretical results have implications for evolutionary biology, as well as synthetic biology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1803.05609v2-abstract-full').style.display = 'none'; document.getElementById('1803.05609v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 March, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in Cell Systems. 32 pages, 10 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1802.06153">arXiv:1802.06153</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1802.06153">pdf</a>, <a href="https://arxiv.org/format/1802.06153">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Likelihood-Free Inference Framework for Population Genetic Data using Exchangeable Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Chan%2C+J">Jeffrey Chan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Perrone%2C+V">Valerio Perrone</a>, <a href="/search/q-bio?searchtype=author&amp;query=Spence%2C+J+P">Jeffrey P. Spence</a>, <a href="/search/q-bio?searchtype=author&amp;query=Jenkins%2C+P+A">Paul A. Jenkins</a>, <a href="/search/q-bio?searchtype=author&amp;query=Mathieson%2C+S">Sara Mathieson</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1802.06153v2-abstract-short" style="display: inline;"> An explosion of high-throughput DNA sequencing in the past decade has led to a surge of interest in population-scale inference with whole-genome data. Recent work in population genetics has centered on designing inference methods for relatively simple model classes, and few scalable general-purpose inference techniques exist for more realistic, complex models. To achieve this, two inferential chal&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.06153v2-abstract-full').style.display = 'inline'; document.getElementById('1802.06153v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1802.06153v2-abstract-full" style="display: none;"> An explosion of high-throughput DNA sequencing in the past decade has led to a surge of interest in population-scale inference with whole-genome data. Recent work in population genetics has centered on designing inference methods for relatively simple model classes, and few scalable general-purpose inference techniques exist for more realistic, complex models. To achieve this, two inferential challenges need to be addressed: (1) population data are exchangeable, calling for methods that efficiently exploit the symmetries of the data, and (2) computing likelihoods is intractable as it requires integrating over a set of correlated, extremely high-dimensional latent variables. These challenges are traditionally tackled by likelihood-free methods that use scientific simulators to generate datasets and reduce them to hand-designed, permutation-invariant summary statistics, often leading to inaccurate inference. In this work, we develop an exchangeable neural network that performs summary statistic-free, likelihood-free inference. Our framework can be applied in a black-box fashion across a variety of simulation-based tasks, both within and outside biology. We demonstrate the power of our approach on the recombination hotspot testing problem, outperforming the state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.06153v2-abstract-full').style.display = 'none'; document.getElementById('1802.06153v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1712.05035">arXiv:1712.05035</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1712.05035">pdf</a>, <a href="https://arxiv.org/format/1712.05035">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Algebraic Geometry">math.AG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Geometry of the sample frequency spectrum and the perils of demographic inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Rosen%2C+Z">Zvi Rosen</a>, <a href="/search/q-bio?searchtype=author&amp;query=Bhaskar%2C+A">Anand Bhaskar</a>, <a href="/search/q-bio?searchtype=author&amp;query=Roch%2C+S">Sebastien Roch</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1712.05035v1-abstract-short" style="display: inline;"> The sample frequency spectrum (SFS), which describes the distribution of mutant alleles in a sample of DNA sequences, is a widely used summary statistic in population genetics. The expected SFS has a strong dependence on the historical population demography and this property is exploited by popular statistical methods to infer complex demographic histories from DNA sequence data. Most, if not all,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.05035v1-abstract-full').style.display = 'inline'; document.getElementById('1712.05035v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1712.05035v1-abstract-full" style="display: none;"> The sample frequency spectrum (SFS), which describes the distribution of mutant alleles in a sample of DNA sequences, is a widely used summary statistic in population genetics. The expected SFS has a strong dependence on the historical population demography and this property is exploited by popular statistical methods to infer complex demographic histories from DNA sequence data. Most, if not all, of these inference methods exhibit pathological behavior, however. Specifically, they often display runaway behavior in optimization, where the inferred population sizes and epoch durations can degenerate to 0 or diverge to infinity, and show undesirable sensitivity of the inferred demography to perturbations in the data. The goal of this paper is to provide theoretical insights into why such problems arise. To this end, we characterize the geometry of the expected SFS for piecewise-constant demographic histories and use our results to show that the aforementioned pathological behavior of popular inference methods is intrinsic to the geometry of the expected SFS. We provide explicit descriptions and visualizations for a toy model with sample size 4, and generalize our intuition to arbitrary sample sizes n using tools from convex and algebraic geometry. We also develop a universal characterization result which shows that the expected SFS of a sample of size n under an arbitrary population history can be recapitulated by a piecewise-constant demography with only k(n) epochs, where k(n) is between n/2 and 2n-1. The set of expected SFS for piecewise-constant demographies with fewer than k(n) epochs is open and non-convex, which causes the above phenomena for inference from data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1712.05035v1-abstract-full').style.display = 'none'; document.getElementById('1712.05035v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 5 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 92D10; 14P10; 52A20; 62P10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1510.06017">arXiv:1510.06017</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1510.06017">pdf</a>, <a href="https://arxiv.org/format/1510.06017">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> Two-Locus Likelihoods under Variable Population Size and Fine-Scale Recombination Rate Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Kamm%2C+J+A">John A. Kamm</a>, <a href="/search/q-bio?searchtype=author&amp;query=Spence%2C+J+P">Jeffrey P. Spence</a>, <a href="/search/q-bio?searchtype=author&amp;query=Chan%2C+J">Jeffrey Chan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1510.06017v2-abstract-short" style="display: inline;"> Two-locus sampling probabilities have played a central role in devising an efficient composite likelihood method for estimating fine-scale recombination rates. Due to mathematical and computational challenges, these sampling probabilities are typically computed under the unrealistic assumption of a constant population size, and simulation studies have shown that resulting recombination rate estima&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1510.06017v2-abstract-full').style.display = 'inline'; document.getElementById('1510.06017v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1510.06017v2-abstract-full" style="display: none;"> Two-locus sampling probabilities have played a central role in devising an efficient composite likelihood method for estimating fine-scale recombination rates. Due to mathematical and computational challenges, these sampling probabilities are typically computed under the unrealistic assumption of a constant population size, and simulation studies have shown that resulting recombination rate estimates can be severely biased in certain cases of historical population size changes. To alleviate this problem, we develop here new methods to compute the sampling probability for variable population size functions that are piecewise constant. Our main theoretical result, implemented in a new software package called LDpop, is a novel formula for the sampling probability that can be evaluated by numerically exponentiating a large but sparse matrix. This formula can handle moderate sample sizes ($n \leq 50$) and demographic size histories with a large number of epochs ($\mathcal{D} \geq 64$). In addition, LDpop implements an approximate formula for the sampling probability that is reasonably accurate and scales to hundreds in sample size ($n \geq 256$). Finally, LDpop includes an importance sampler for the posterior distribution of two-locus genealogies, based on a new result for the optimal proposal distribution in the variable-size setting. Using our methods, we study how a sharp population bottleneck followed by rapid growth affects the correlation between partially linked sites. Then, through an extensive simulation study, we show that accounting for population size changes under such a demographic model leads to substantial improvements in fine-scale recombination rate estimation. LDpop is freely available for download at https://github.com/popgenmethods/ldpop <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1510.06017v2-abstract-full').style.display = 'none'; document.getElementById('1510.06017v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 October, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1510.05631">arXiv:1510.05631</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1510.05631">pdf</a>, <a href="https://arxiv.org/format/1510.05631">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1534/genetics.115.184101">10.1534/genetics.115.184101 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The site frequency spectrum for general coalescents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Spence%2C+J+P">Jeffrey P. Spence</a>, <a href="/search/q-bio?searchtype=author&amp;query=Kamm%2C+J+A">John A. Kamm</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1510.05631v2-abstract-short" style="display: inline;"> General genealogical processes such as $螞$- and $螢$-coalescents, which respectively model multiple and simultaneous mergers, have important applications in studying marine species, strong positive selection, recurrent selective sweeps, strong bottlenecks, large sample sizes, and so on. Recently, there has been significant progress in developing useful inference tools for such general models. In pa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1510.05631v2-abstract-full').style.display = 'inline'; document.getElementById('1510.05631v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1510.05631v2-abstract-full" style="display: none;"> General genealogical processes such as $螞$- and $螢$-coalescents, which respectively model multiple and simultaneous mergers, have important applications in studying marine species, strong positive selection, recurrent selective sweeps, strong bottlenecks, large sample sizes, and so on. Recently, there has been significant progress in developing useful inference tools for such general models. In particular, inference methods based on the site frequency spectrum (SFS) have received noticeable attention. Here, we derive a new formula for the expected SFS for general $螞$- and $螢$-coalescents, which leads to an efficient algorithm. For time-homogeneous coalescents, the runtime of our algorithm for computing the expected SFS is $O(n^2)$, where $n$ is the sample size. This is a factor of $n^2$ faster than the state-of-the-art method. Furthermore, in contrast to existing methods, our method generalizes to time-inhomogeneous $螞$- and $螢$-coalescents with measures that factorize as $螞(dx)/味(t)$ and $螢(dx)/味(t)$, respectively, where $味$ denotes a strictly positive function of time. The runtime of our algorithm in this setting is $O(n^3)$. We also obtain general theoretical results for the identifiability of the $螞$ measure when $味$ is a constant function, as well as for the identifiability of the function $味$ under a fixed $螢$ measure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1510.05631v2-abstract-full').style.display = 'none'; document.getElementById('1510.05631v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 4 figure</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Genetics, Vol. 202 No. 4 (2016) 1549-1561 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1505.04228">arXiv:1505.04228</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1505.04228">pdf</a>, <a href="https://arxiv.org/format/1505.04228">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1073/pnas.1503717112">10.1073/pnas.1503717112 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Fundamental limits on the accuracy of demographic inference based on the sample frequency spectrum </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Terhorst%2C+J">Jonathan Terhorst</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1505.04228v1-abstract-short" style="display: inline;"> The sample frequency spectrum (SFS) of DNA sequences from a collection of individuals is a summary statistic which is commonly used for parametric inference in population genetics. Despite the popularity of SFS-based inference methods, currently little is known about the information-theoretic limit on the estimation accuracy as a function of sample size. Here, we show that using the SFS to estimat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1505.04228v1-abstract-full').style.display = 'inline'; document.getElementById('1505.04228v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1505.04228v1-abstract-full" style="display: none;"> The sample frequency spectrum (SFS) of DNA sequences from a collection of individuals is a summary statistic which is commonly used for parametric inference in population genetics. Despite the popularity of SFS-based inference methods, currently little is known about the information-theoretic limit on the estimation accuracy as a function of sample size. Here, we show that using the SFS to estimate the size history of a population has a minimax error of at least $O(1/\log s)$, where $s$ is the number of independent segregating sites used in the analysis. This rate is exponentially worse than known convergence rates for many classical estimation problems in statistics. Another surprising aspect of our theoretical bound is that it does not depend on the dimension of the SFS, which is related to the number of sampled individuals. This means that, for a fixed number $s$ of segregating sites considered, using more individuals does not help to reduce the minimax error bound. Our result pertains to populations that have experienced a bottleneck, and we argue that it can be expected to apply to many populations in nature. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1505.04228v1-abstract-full').style.display = 'none'; document.getElementById('1505.04228v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 1 figure</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proc. Natl. Acad. Sci. U.S.A., Vol. 112, No. 25 (2015) 7677-7682 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1503.01133">arXiv:1503.01133</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1503.01133">pdf</a>, <a href="https://arxiv.org/format/1503.01133">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1080/10618600.2016.1159212">10.1080/10618600.2016.1159212 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Efficient computation of the joint sample frequency spectra for multiple populations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Kamm%2C+J+A">John A. Kamm</a>, <a href="/search/q-bio?searchtype=author&amp;query=Terhorst%2C+J">Jonathan Terhorst</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1503.01133v1-abstract-short" style="display: inline;"> A wide range of studies in population genetics have employed the sample frequency spectrum (SFS), a summary statistic which describes the distribution of mutant alleles at a polymorphic site in a sample of DNA sequences. In particular, recently there has been growing interest in analyzing the joint SFS data from multiple populations to infer parameters of complex demographic histories, including v&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1503.01133v1-abstract-full').style.display = 'inline'; document.getElementById('1503.01133v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1503.01133v1-abstract-full" style="display: none;"> A wide range of studies in population genetics have employed the sample frequency spectrum (SFS), a summary statistic which describes the distribution of mutant alleles at a polymorphic site in a sample of DNA sequences. In particular, recently there has been growing interest in analyzing the joint SFS data from multiple populations to infer parameters of complex demographic histories, including variable population sizes, population split times, migration rates, admixture proportions, and so on. Although much methodological progress has been made, existing SFS-based inference methods suffer from numerical instability and high computational complexity when multiple populations are involved and the sample size is large. In this paper, we present new analytic formulas and algorithms that enable efficient computation of the expected joint SFS for multiple populations related by a complex demographic model with arbitrary population size histories (including piecewise exponential growth). Our results are implemented in a new software package called momi (MOran Models for Inference). Through an empirical study involving tens of populations, we demonstrate our improvements to numerical stability and computational complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1503.01133v1-abstract-full').style.display = 'none'; document.getElementById('1503.01133v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1405.6863">arXiv:1405.6863</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1405.6863">pdf</a>, <a href="https://arxiv.org/ps/1405.6863">ps</a>, <a href="https://arxiv.org/format/1405.6863">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1214/EJP.v20-3564">10.1214/EJP.v20-3564 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Tractable diffusion and coalescent processes for weakly correlated loci </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Jenkins%2C+P+A">Paul A. Jenkins</a>, <a href="/search/q-bio?searchtype=author&amp;query=Fearnhead%2C+P">Paul Fearnhead</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1405.6863v2-abstract-short" style="display: inline;"> Widely used models in genetics include the Wright-Fisher diffusion and its moment dual, Kingman&#39;s coalescent. Each has a multilocus extension but under neither extension is the sampling distribution available in closed-form, and their computation is extremely difficult. In this paper we derive two new multilocus population genetic models, one a diffusion and the other a coalescent process, which a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1405.6863v2-abstract-full').style.display = 'inline'; document.getElementById('1405.6863v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1405.6863v2-abstract-full" style="display: none;"> Widely used models in genetics include the Wright-Fisher diffusion and its moment dual, Kingman&#39;s coalescent. Each has a multilocus extension but under neither extension is the sampling distribution available in closed-form, and their computation is extremely difficult. In this paper we derive two new multilocus population genetic models, one a diffusion and the other a coalescent process, which are much simpler than the standard models, but which capture their key properties for large recombination rates. The diffusion model is based on a central limit theorem for density dependent population processes, and we show that the sampling distribution is a linear combination of moments of Gaussian distributions and hence available in closed-form. The coalescent process is based on a probabilistic coupling of the ancestral recombination graph to a simpler genealogical process which exposes the leading dynamics of the former. We further demonstrate that when we consider the sampling distribution as an asymptotic expansion in inverse powers of the recombination parameter, the sampling distributions of the new models agree with the standard ones up to the first two orders. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1405.6863v2-abstract-full').style.display = 'none'; document.getElementById('1405.6863v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2015; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">34 pages, 1 figure</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 92D15 (Primary) 65C50; 92D10 (Secondary) </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Electronic Journal of Probability, Vol. 20 (2015) Article 58 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1403.0858">arXiv:1403.0858</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1403.0858">pdf</a>, <a href="https://arxiv.org/format/1403.0858">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> </div> <p class="title is-5 mathjax"> Decoding coalescent hidden Markov models in linear time </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Harris%2C+K">Kelley Harris</a>, <a href="/search/q-bio?searchtype=author&amp;query=Sheehan%2C+S">Sara Sheehan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Kamm%2C+J+A">John A. Kamm</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1403.0858v1-abstract-short" style="display: inline;"> In many areas of computational biology, hidden Markov models (HMMs) have been used to model local genomic features. In particular, coalescent HMMs have been used to infer ancient population sizes, migration rates, divergence times, and other parameters such as mutation and recombination rates. As more loci, sequences, and hidden states are added to the model, however, the runtime of coalescent HMM&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1403.0858v1-abstract-full').style.display = 'inline'; document.getElementById('1403.0858v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1403.0858v1-abstract-full" style="display: none;"> In many areas of computational biology, hidden Markov models (HMMs) have been used to model local genomic features. In particular, coalescent HMMs have been used to infer ancient population sizes, migration rates, divergence times, and other parameters such as mutation and recombination rates. As more loci, sequences, and hidden states are added to the model, however, the runtime of coalescent HMMs can quickly become prohibitive. Here we present a new algorithm for reducing the runtime of coalescent HMMs from quadratic in the number of hidden time states to linear, without making any additional approximations. Our algorithm can be incorporated into various coalescent HMMs, including the popular method PSMC for inferring variable effective population sizes. Here we implement this algorithm to speed up our demographic inference method diCal, which is equivalent to PSMC when applied to a sample of two haplotypes. We demonstrate that the linear-time method can reconstruct a population size change history more accurately than the quadratic-time method, given similar computation resources. We also apply the method to data from the 1000 Genomes project, inferring a high-resolution history of size changes in the European population. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1403.0858v1-abstract-full').style.display = 'none'; document.getElementById('1403.0858v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 5 figures. To appear in the Proceedings of the 18th Annual International Conference on Research in Computational Molecular Biology (RECOMB 2014). The final publication is available at link.springer.com</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1310.8420">arXiv:1310.8420</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1310.8420">pdf</a>, <a href="https://arxiv.org/format/1310.8420">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> SMaSH: A Benchmarking Toolkit for Human Genome Variant Calling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Talwalkar%2C+A">Ameet Talwalkar</a>, <a href="/search/q-bio?searchtype=author&amp;query=Liptrap%2C+J">Jesse Liptrap</a>, <a href="/search/q-bio?searchtype=author&amp;query=Newcomb%2C+J">Julie Newcomb</a>, <a href="/search/q-bio?searchtype=author&amp;query=Hartl%2C+C">Christopher Hartl</a>, <a href="/search/q-bio?searchtype=author&amp;query=Terhorst%2C+J">Jonathan Terhorst</a>, <a href="/search/q-bio?searchtype=author&amp;query=Curtis%2C+K">Kristal Curtis</a>, <a href="/search/q-bio?searchtype=author&amp;query=Bresler%2C+M">Ma&#39;ayan Bresler</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a>, <a href="/search/q-bio?searchtype=author&amp;query=Jordan%2C+M+I">Michael I. Jordan</a>, <a href="/search/q-bio?searchtype=author&amp;query=Patterson%2C+D">David Patterson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1310.8420v2-abstract-short" style="display: inline;"> Motivation: Computational methods are essential to extract actionable information from raw sequencing data, and to thus fulfill the promise of next-generation sequencing technology. Unfortunately, computational tools developed to call variants from human sequencing data disagree on many of their predictions, and current methods to evaluate accuracy and computational performance are ad-hoc and inco&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1310.8420v2-abstract-full').style.display = 'inline'; document.getElementById('1310.8420v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1310.8420v2-abstract-full" style="display: none;"> Motivation: Computational methods are essential to extract actionable information from raw sequencing data, and to thus fulfill the promise of next-generation sequencing technology. Unfortunately, computational tools developed to call variants from human sequencing data disagree on many of their predictions, and current methods to evaluate accuracy and computational performance are ad-hoc and incomplete. Agreement on benchmarking variant calling methods would stimulate development of genomic processing tools and facilitate communication among researchers. Results: We propose SMaSH, a benchmarking methodology for evaluating human genome variant calling algorithms. We generate synthetic datasets, organize and interpret a wide range of existing benchmarking data for real genomes, and propose a set of accuracy and computational performance metrics for evaluating variant calling methods on this benchmarking data. Moreover, we illustrate the utility of SMaSH to evaluate the performance of some leading single nucleotide polymorphism (SNP), indel, and structural variant calling algorithms. Availability: We provide free and open access online to the SMaSH toolkit, along with detailed documentation, at smash.cs.berkeley.edu. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1310.8420v2-abstract-full').style.display = 'none'; document.getElementById('1310.8420v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2014; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2013. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1310.3444">arXiv:1310.3444</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1310.3444">pdf</a>, <a href="https://arxiv.org/format/1310.3444">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1534/genetics.113.158584">10.1534/genetics.113.158584 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> General triallelic frequency spectrum under demographic models with variable population size </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Jenkins%2C+P+A">Paul A. Jenkins</a>, <a href="/search/q-bio?searchtype=author&amp;query=Mueller%2C+J+W">Jonas W. Mueller</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1310.3444v2-abstract-short" style="display: inline;"> It is becoming routine to obtain datasets on DNA sequence variation across several thousands of chromosomes, providing unprecedented opportunity to infer the underlying biological and demographic forces. Such data make it vital to study summary statistics which offer enough compression to be tractable, while preserving a great deal of information. One well-studied summary is the site frequency spe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1310.3444v2-abstract-full').style.display = 'inline'; document.getElementById('1310.3444v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1310.3444v2-abstract-full" style="display: none;"> It is becoming routine to obtain datasets on DNA sequence variation across several thousands of chromosomes, providing unprecedented opportunity to infer the underlying biological and demographic forces. Such data make it vital to study summary statistics which offer enough compression to be tractable, while preserving a great deal of information. One well-studied summary is the site frequency spectrum---the empirical distribution, across segregating sites, of the sample frequency of the derived allele. However, most previous theoretical work has assumed that each site has experienced at most one mutation event in its genealogical history, which becomes less tenable for very large sample sizes. In this work we obtain, in closed-form, the predicted frequency spectrum of a site that has experienced at most two mutation events, under very general assumptions about the distribution of branch lengths in the underlying coalescent tree. Among other applications, we obtain the frequency spectrum of a triallelic site in a model of historically varying population size. We demonstrate the utility of our formulas in two settings: First, we show that triallelic sites are more sensitive to the parameters of a population that has experienced historical growth, suggesting that they will have use if they can be incorporated into demographic inference. Second, we investigate a recently proposed alternative mechanism of mutation in which the two derived alleles of a triallelic site are created simultaneously within a single individual, and we develop a test to determine whether it is responsible for the excess of triallelic sites in the human genome. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1310.3444v2-abstract-full').style.display = 'none'; document.getElementById('1310.3444v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2013; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, 11 figures (main text) + 6 pages, 3 figures (Supporting Information)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Genetics, Vol. 196, No. 1 (2014) 295-311 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1310.1068">arXiv:1310.1068</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1310.1068">pdf</a>, <a href="https://arxiv.org/ps/1310.1068">ps</a>, <a href="https://arxiv.org/format/1310.1068">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Functional Analysis">math.FA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1214/14-AOAS764">10.1214/14-AOAS764 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A novel spectral method for inferring general diploid selection from time series genetic data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Steinr%C3%BCcken%2C+M">Matthias Steinr眉cken</a>, <a href="/search/q-bio?searchtype=author&amp;query=Bhaskar%2C+A">Anand Bhaskar</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1310.1068v2-abstract-short" style="display: inline;"> The increased availability of time series genetic variation data from experimental evolution studies and ancient DNA samples has created new opportunities to identify genomic regions under selective pressure and to estimate their associated fitness parameters. However, it is a challenging problem to compute the likelihood of nonneutral models for the population allele frequency dynamics, given the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1310.1068v2-abstract-full').style.display = 'inline'; document.getElementById('1310.1068v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1310.1068v2-abstract-full" style="display: none;"> The increased availability of time series genetic variation data from experimental evolution studies and ancient DNA samples has created new opportunities to identify genomic regions under selective pressure and to estimate their associated fitness parameters. However, it is a challenging problem to compute the likelihood of nonneutral models for the population allele frequency dynamics, given the observed temporal DNA data. Here, we develop a novel spectral algorithm to analytically and efficiently integrate over all possible frequency trajectories between consecutive time points. This advance circumvents the limitations of existing methods which require fine-tuning the discretization of the population allele frequency space when numerically approximating requisite integrals. Furthermore, our method is flexible enough to handle general diploid models of selection where the heterozygote and homozygote fitness parameters can take any values, while previous methods focused on only a few restricted models of selection. We demonstrate the utility of our method on simulated data and also apply it to analyze ancient DNA data from genetic loci associated with coat coloration in horses. In contrast to previous studies, our exploration of the full fitness parameter space reveals that a heterozygote advantage form of balancing selection may have been acting on these loci. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1310.1068v2-abstract-full').style.display = 'none'; document.getElementById('1310.1068v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2015; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 October, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in at http://dx.doi.org/10.1214/14-AOAS764 the Annals of Applied Statistics (http://www.imstat.org/aoas/) by the Institute of Mathematical Statistics (http://www.imstat.org)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> IMS-AOAS-AOAS764 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Annals of Applied Statistics 2014, Vol. 8, No. 4, 2203-2222 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1309.5056">arXiv:1309.5056</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1309.5056">pdf</a>, <a href="https://arxiv.org/ps/1309.5056">ps</a>, <a href="https://arxiv.org/format/1309.5056">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1214/14-AOS1264">10.1214/14-AOS1264 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Descartes&#39; rule of signs and the identifiability of population demographic models from genomic variation data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Bhaskar%2C+A">Anand Bhaskar</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1309.5056v3-abstract-short" style="display: inline;"> The sample frequency spectrum (SFS) is a widely-used summary statistic of genomic variation in a sample of homologous DNA sequences. It provides a highly efficient dimensional reduction of large-scale population genomic data and its mathematical dependence on the underlying population demography is well understood, thus enabling the development of efficient inference algorithms. However, it has be&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1309.5056v3-abstract-full').style.display = 'inline'; document.getElementById('1309.5056v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1309.5056v3-abstract-full" style="display: none;"> The sample frequency spectrum (SFS) is a widely-used summary statistic of genomic variation in a sample of homologous DNA sequences. It provides a highly efficient dimensional reduction of large-scale population genomic data and its mathematical dependence on the underlying population demography is well understood, thus enabling the development of efficient inference algorithms. However, it has been recently shown that very different population demographies can actually generate the same SFS for arbitrarily large sample sizes. Although in principle this nonidentifiability issue poses a thorny challenge to statistical inference, the population size functions involved in the counterexamples are arguably not so biologically realistic. Here, we revisit this problem and examine the identifiability of demographic models under the restriction that the population sizes are piecewise-defined where each piece belongs to some family of biologically-motivated functions. Under this assumption, we prove that the expected SFS of a sample uniquely determines the underlying demographic model, provided that the sample is sufficiently large. We obtain a general bound on the sample size sufficient for identifiability; the bound depends on the number of pieces in the demographic model and also on the type of population size function in each piece. In the cases of piecewise-constant, piecewise-exponential and piecewise-generalized-exponential models, which are often assumed in population genomic inferences, we provide explicit formulas for the bounds as simple functions of the number of pieces. Lastly, we obtain analogous results for the &#34;folded&#34; SFS, which is often used when there is ambiguity as to which allelic type is ancestral. Our results are proved using a generalization of Descartes&#39; rule of signs for polynomials to the Laplace transform of piecewise continuous functions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1309.5056v3-abstract-full').style.display = 'none'; document.getElementById('1309.5056v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2014; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 September, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in at http://dx.doi.org/10.1214/14-AOS1264 the Annals of Statistics (http://www.imstat.org/aos/) by the Institute of Mathematical Statistics (http://www.imstat.org)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> IMS-AOS-AOS1264 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Annals of Statistics 2014, Vol. 42, No. 6, 2469-2493 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1308.0091">arXiv:1308.0091</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1308.0091">pdf</a>, <a href="https://arxiv.org/format/1308.0091">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1073/pnas.1322709111">10.1073/pnas.1322709111 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Distortion of genealogical properties when the sample is very large </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Bhaskar%2C+A">Anand Bhaskar</a>, <a href="/search/q-bio?searchtype=author&amp;query=Clark%2C+A+G">Andrew G. Clark</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1308.0091v1-abstract-short" style="display: inline;"> Study sample sizes in human genetics are growing rapidly, and in due course it will become routine to analyze samples with hundreds of thousands if not millions of individuals. In addition to posing computational challenges, such large sample sizes call for carefully re-examining the theoretical foundation underlying commonly-used analytical tools. Here, we study the accuracy of the coalescent, a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1308.0091v1-abstract-full').style.display = 'inline'; document.getElementById('1308.0091v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1308.0091v1-abstract-full" style="display: none;"> Study sample sizes in human genetics are growing rapidly, and in due course it will become routine to analyze samples with hundreds of thousands if not millions of individuals. In addition to posing computational challenges, such large sample sizes call for carefully re-examining the theoretical foundation underlying commonly-used analytical tools. Here, we study the accuracy of the coalescent, a central model for studying the ancestry of a sample of individuals. The coalescent arises as a limit of a large class of random mating models and it is an accurate approximation to the original model provided that the population size is sufficiently larger than the sample size. We develop a method for performing exact computation in the discrete-time Wright-Fisher (DTWF) model and compare several key genealogical quantities of interest with the coalescent predictions. For realistic demographic scenarios, we find that there are a significant number of multiple- and simultaneous-merger events under the DTWF model, which are absent in the coalescent by construction. Furthermore, for large sample sizes, there are noticeable differences in the expected number of rare variants between the coalescent and the DTWF model. To balance the tradeoff between accuracy and computational efficiency, we propose a hybrid algorithm that utilizes the DTWF model for the recent past and the coalescent for the more distant past. Our results demonstrate that the hybrid method with only a handful of generations of the DTWF model leads to a frequency spectrum that is quite close to the prediction of the full DTWF model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1308.0091v1-abstract-full').style.display = 'none'; document.getElementById('1308.0091v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 2 tables, 14 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proc. Natl. Acad. Sci. U.S.A., Vol. 111, No. 6 (2014) 2385-2390 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1304.3160">arXiv:1304.3160</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1304.3160">pdf</a>, <a href="https://arxiv.org/ps/1304.3160">ps</a>, <a href="https://arxiv.org/format/1304.3160">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1371/journal.pone.0070495">10.1371/journal.pone.0070495 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The influence of relatives on the efficiency and error rate of familial searching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Rohlfs%2C+R+V">Rori V. Rohlfs</a>, <a href="/search/q-bio?searchtype=author&amp;query=Murphy%2C+E">Erin Murphy</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a>, <a href="/search/q-bio?searchtype=author&amp;query=Slatkin%2C+M">Montgomery Slatkin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1304.3160v2-abstract-short" style="display: inline;"> We investigate the consequences of adopting the criteria used by the state of California, as described by Myers et al. (2011), for conducting familial searches. We carried out a simulation study of randomly generated profiles of related and unrelated individuals with 13-locus CODIS genotypes and YFiler Y-chromosome haplotypes, on which the Myers protocol for relative identification was carried out&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1304.3160v2-abstract-full').style.display = 'inline'; document.getElementById('1304.3160v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1304.3160v2-abstract-full" style="display: none;"> We investigate the consequences of adopting the criteria used by the state of California, as described by Myers et al. (2011), for conducting familial searches. We carried out a simulation study of randomly generated profiles of related and unrelated individuals with 13-locus CODIS genotypes and YFiler Y-chromosome haplotypes, on which the Myers protocol for relative identification was carried out. For Y-chromosome sharing first degree relatives, the Myers protocol has a high probability (80 - 99%) of identifying their relationship. For unrelated individuals, there is a low probability that an unrelated person in the database will be identified as a first-degree relative. For more distant Y-haplotype sharing relatives (half-siblings, first cousins, half-first cousins or second cousins) there is a substantial probability that the more distant relative will be incorrectly identified as a first-degree relative. For example, there is a 3 - 18% probability that a first cousin will be identified as a full sibling, with the probability depending on the population background. Although the California familial search policy is likely to identify a first degree relative if his profile is in the database, and it poses little risk of falsely identifying an unrelated individual in a database as a first-degree relative, there is a substantial risk of falsely identifying a more distant Y-haplotype sharing relative in the database as a first-degree relative, with the consequence that their immediate family may become the target for further investigation. This risk falls disproportionately on those ethnic groups that are currently overrepresented in state and federal databases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1304.3160v2-abstract-full').style.display = 'none'; document.getElementById('1304.3160v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2013; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 April, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">main text: 19 pages, 4 tables, 2 figures supplemental text: 2 pages, 5 tables all together as single file</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> PLoS ONE 8(8): e70495 (2013) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1208.5087">arXiv:1208.5087</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1208.5087">pdf</a>, <a href="https://arxiv.org/ps/1208.5087">ps</a>, <a href="https://arxiv.org/format/1208.5087">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Analysis of PDEs">math.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.tpb.2012.10.006">10.1016/j.tpb.2012.10.006 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> An explicit transition density expansion for a multi-allelic Wright-Fisher diffusion with general diploid selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Steinr%C3%BCcken%2C+M">Matthias Steinr眉cken</a>, <a href="/search/q-bio?searchtype=author&amp;query=Wang%2C+Y+X+R">Y. X. Rachel Wang</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1208.5087v2-abstract-short" style="display: inline;"> Characterizing time-evolution of allele frequencies in a population is a fundamental problem in population genetics. In the Wright-Fisher diffusion, such dynamics is captured by the transition density function, which satisfies well-known partial differential equations. For a multi-allelic model with general diploid selection, various theoretical results exist on representations of the transition d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1208.5087v2-abstract-full').style.display = 'inline'; document.getElementById('1208.5087v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1208.5087v2-abstract-full" style="display: none;"> Characterizing time-evolution of allele frequencies in a population is a fundamental problem in population genetics. In the Wright-Fisher diffusion, such dynamics is captured by the transition density function, which satisfies well-known partial differential equations. For a multi-allelic model with general diploid selection, various theoretical results exist on representations of the transition density, but finding an explicit formula has remained a difficult problem. In this paper, a technique recently developed for a diallelic model is extended to find an explicit transition density for an arbitrary number of alleles, under a general diploid selection model with recurrent parent-independent mutation. Specifically, the method finds the eigenvalues and eigenfunctions of the generator associated with the multi-allelic diffusion, thus yielding an accurate spectral representation of the transition density. Furthermore, this approach allows for efficient, accurate computation of various other quantities of interest, including the normalizing constant of the stationary distribution and the rate of convergence to this distribution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1208.5087v2-abstract-full').style.display = 'none'; document.getElementById('1208.5087v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2012; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 August, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 12 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Theoretical Population Biology 83 (2013) 1-14 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1208.5086">arXiv:1208.5086</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1208.5086">pdf</a>, <a href="https://arxiv.org/ps/1208.5086">ps</a>, <a href="https://arxiv.org/format/1208.5086">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.tpb.2012.08.004">10.1016/j.tpb.2012.08.004 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A sequentially Markov conditional sampling distribution for structured populations with migration and recombination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Steinr%C3%BCcken%2C+M">Matthias Steinr眉cken</a>, <a href="/search/q-bio?searchtype=author&amp;query=Paul%2C+J+S">Joshua S. Paul</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1208.5086v2-abstract-short" style="display: inline;"> Conditional sampling distributions (CSDs), sometimes referred to as copying models, underlie numerous practical tools in population genomic analyses. Though an important application that has received much attention is the inference of population structure, the explicit exchange of migrants at specified rates has not hitherto been incorporated into the CSD in a principled framework. Recently, in th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1208.5086v2-abstract-full').style.display = 'inline'; document.getElementById('1208.5086v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1208.5086v2-abstract-full" style="display: none;"> Conditional sampling distributions (CSDs), sometimes referred to as copying models, underlie numerous practical tools in population genomic analyses. Though an important application that has received much attention is the inference of population structure, the explicit exchange of migrants at specified rates has not hitherto been incorporated into the CSD in a principled framework. Recently, in the case of a single panmictic population, a sequentially Markov CSD has been developed as an accurate, efficient approximation to a principled CSD derived from the diffusion process dual to the coalescent with recombination. In this paper, the sequentially Markov CSD framework is extended to incorporate subdivided population structure, thus providing an efficiently computable CSD that admits a genealogical interpretation related to the structured coalescent with migration and recombination. As a concrete application, it is demonstrated empirically that the CSD developed here can be employed to yield accurate estimation of a wide range of migration rates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1208.5086v2-abstract-full').style.display = 'none'; document.getElementById('1208.5086v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2012; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 August, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 10 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Theoretical Population Biology 87 (2013) 51-61 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1109.2386">arXiv:1109.2386</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1109.2386">pdf</a>, <a href="https://arxiv.org/format/1109.2386">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1239/aap/1339878718">10.1239/aap/1339878718 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Approximate sampling formulae for general finite-alleles models of mutation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Bhaskar%2C+A">Anand Bhaskar</a>, <a href="/search/q-bio?searchtype=author&amp;query=Kamm%2C+J+A">John A. Kamm</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1109.2386v3-abstract-short" style="display: inline;"> Many applications in genetic analyses utilize sampling distributions, which describe the probability of observing a sample of DNA sequences randomly drawn from a population. In the one-locus case with special models of mutation such as the infinite-alleles model or the finite-alleles parent-independent mutation model, closed-form sampling distributions under the coalescent have been known for many&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1109.2386v3-abstract-full').style.display = 'inline'; document.getElementById('1109.2386v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1109.2386v3-abstract-full" style="display: none;"> Many applications in genetic analyses utilize sampling distributions, which describe the probability of observing a sample of DNA sequences randomly drawn from a population. In the one-locus case with special models of mutation such as the infinite-alleles model or the finite-alleles parent-independent mutation model, closed-form sampling distributions under the coalescent have been known for many decades. However, no exact formula is currently known for more general models of mutation that are of biological interest. In this paper, models with finitely-many alleles are considered, and an urn construction related to the coalescent is used to derive approximate closed-form sampling formulas for an arbitrary irreducible recurrent mutation model or for a reversible recurrent mutation model, depending on whether the number of distinct observed allele types is at most three or four, respectively. It is demonstrated empirically that the formulas derived here are highly accurate when the per-base mutation rate is low, which holds for many biological organisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1109.2386v3-abstract-full').style.display = 'none'; document.getElementById('1109.2386v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2012; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 September, 2011; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2011. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 1 figure</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Advances in Applied Probability, Vol. 44, No. 2 (2012), 408-428 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1107.4700">arXiv:1107.4700</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1107.4700">pdf</a>, <a href="https://arxiv.org/format/1107.4700">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1239/aap/1339878717">10.1239/aap/1339878717 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Closed-form asymptotic sampling distributions under the coalescent with recombination for an arbitrary number of loci </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Bhaskar%2C+A">Anand Bhaskar</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1107.4700v2-abstract-short" style="display: inline;"> Obtaining a closed-form sampling distribution for the coalescent with recombination is a challenging problem. In the case of two loci, a new framework based on asymptotic series has recently been developed to derive closed-form results when the recombination rate is moderate to large. In this paper, an arbitrary number of loci is considered and combinatorial approaches are employed to find closed-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1107.4700v2-abstract-full').style.display = 'inline'; document.getElementById('1107.4700v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1107.4700v2-abstract-full" style="display: none;"> Obtaining a closed-form sampling distribution for the coalescent with recombination is a challenging problem. In the case of two loci, a new framework based on asymptotic series has recently been developed to derive closed-form results when the recombination rate is moderate to large. In this paper, an arbitrary number of loci is considered and combinatorial approaches are employed to find closed-form expressions for the first couple of terms in an asymptotic expansion of the multi-locus sampling distribution. These expressions are universal in the sense that their functional form in terms of the marginal one-locus distributions applies to all finite- and infinite-alleles models of mutation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1107.4700v2-abstract-full').style.display = 'none'; document.getElementById('1107.4700v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2011; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 July, 2011; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2011. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 2 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Advances in Applied Probability, Vol. 44, No. 2 (2012), 391-407 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1107.3897">arXiv:1107.3897</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1107.3897">pdf</a>, <a href="https://arxiv.org/ps/1107.3897">ps</a>, <a href="https://arxiv.org/format/1107.3897">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1214/11-AAP780">10.1214/11-AAP780 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Pad茅 approximants and exact two-locus sampling distributions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/q-bio?searchtype=author&amp;query=Jenkins%2C+P+A">Paul A. Jenkins</a>, <a href="/search/q-bio?searchtype=author&amp;query=Song%2C+Y+S">Yun S. Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1107.3897v2-abstract-short" style="display: inline;"> For population genetics models with recombination, obtaining an exact, analytic sampling distribution has remained a challenging open problem for several decades. Recently, a new perspective based on asymptotic series has been introduced to make progress on this problem. Specifically, closed-form expressions have been derived for the first few terms in an asymptotic expansion of the two-locus samp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1107.3897v2-abstract-full').style.display = 'inline'; document.getElementById('1107.3897v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1107.3897v2-abstract-full" style="display: none;"> For population genetics models with recombination, obtaining an exact, analytic sampling distribution has remained a challenging open problem for several decades. Recently, a new perspective based on asymptotic series has been introduced to make progress on this problem. Specifically, closed-form expressions have been derived for the first few terms in an asymptotic expansion of the two-locus sampling distribution when the recombination rate $蟻$ is moderate to large. In this paper, a new computational technique is developed for finding the asymptotic expansion to an arbitrary order. Computation in this new approach can be automated easily. Furthermore, it is proved here that only a finite number of terms in the asymptotic expansion is needed to recover (via the method of Pad茅 approximants) the exact two-locus sampling distribution as an analytic function of $蟻$; this function is exact for all values of $蟻\in[0,\infty)$. It is also shown that the new computational framework presented here is flexible enough to incorporate natural selection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1107.3897v2-abstract-full').style.display = 'none'; document.getElementById('1107.3897v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2012; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2011; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2011. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in at http://dx.doi.org/10.1214/11-AAP780 the Annals of Applied Probability (http://www.imstat.org/aap/) by the Institute of Mathematical Statistics (http://www.imstat.org)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> IMS-AAP-AAP780 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Annals of Applied Probability 2012, Vol. 22, No. 2, 576-607 </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10