CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;14 of 14 results for author: <span class="mathjax">Lan, C L</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Lan%2C+C+L">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Lan, C L"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Lan%2C+C+L&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Lan, C L"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00118">arXiv:2408.00118</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.00118">pdf</a>, <a href="https://arxiv.org/format/2408.00118">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemma 2: Improving Open Language Models at a Practical Size </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gemma+Team"> Gemma Team</a>, <a href="/search/cs?searchtype=author&amp;query=Riviere%2C+M">Morgane Riviere</a>, <a href="/search/cs?searchtype=author&amp;query=Pathak%2C+S">Shreya Pathak</a>, <a href="/search/cs?searchtype=author&amp;query=Sessa%2C+P+G">Pier Giuseppe Sessa</a>, <a href="/search/cs?searchtype=author&amp;query=Hardin%2C+C">Cassidy Hardin</a>, <a href="/search/cs?searchtype=author&amp;query=Bhupatiraju%2C+S">Surya Bhupatiraju</a>, <a href="/search/cs?searchtype=author&amp;query=Hussenot%2C+L">L茅onard Hussenot</a>, <a href="/search/cs?searchtype=author&amp;query=Mesnard%2C+T">Thomas Mesnard</a>, <a href="/search/cs?searchtype=author&amp;query=Shahriari%2C+B">Bobak Shahriari</a>, <a href="/search/cs?searchtype=author&amp;query=Ram%C3%A9%2C+A">Alexandre Ram茅</a>, <a href="/search/cs?searchtype=author&amp;query=Ferret%2C+J">Johan Ferret</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+P">Peter Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Tafti%2C+P">Pouya Tafti</a>, <a href="/search/cs?searchtype=author&amp;query=Friesen%2C+A">Abe Friesen</a>, <a href="/search/cs?searchtype=author&amp;query=Casbon%2C+M">Michelle Casbon</a>, <a href="/search/cs?searchtype=author&amp;query=Ramos%2C+S">Sabela Ramos</a>, <a href="/search/cs?searchtype=author&amp;query=Kumar%2C+R">Ravin Kumar</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Jerome%2C+S">Sammy Jerome</a>, <a href="/search/cs?searchtype=author&amp;query=Tsitsulin%2C+A">Anton Tsitsulin</a>, <a href="/search/cs?searchtype=author&amp;query=Vieillard%2C+N">Nino Vieillard</a>, <a href="/search/cs?searchtype=author&amp;query=Stanczyk%2C+P">Piotr Stanczyk</a>, <a href="/search/cs?searchtype=author&amp;query=Girgin%2C+S">Sertan Girgin</a>, <a href="/search/cs?searchtype=author&amp;query=Momchev%2C+N">Nikola Momchev</a>, <a href="/search/cs?searchtype=author&amp;query=Hoffman%2C+M">Matt Hoffman</a> , et al. (173 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00118v3-abstract-short" style="display: inline;"> In this work, we introduce Gemma 2, a new addition to the Gemma family of lightweight, state-of-the-art open models, ranging in scale from 2 billion to 27 billion parameters. In this new version, we apply several known technical modifications to the Transformer architecture, such as interleaving local-global attentions (Beltagy et al., 2020a) and group-query attention (Ainslie et al., 2023). We al&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00118v3-abstract-full').style.display = 'inline'; document.getElementById('2408.00118v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00118v3-abstract-full" style="display: none;"> In this work, we introduce Gemma 2, a new addition to the Gemma family of lightweight, state-of-the-art open models, ranging in scale from 2 billion to 27 billion parameters. In this new version, we apply several known technical modifications to the Transformer architecture, such as interleaving local-global attentions (Beltagy et al., 2020a) and group-query attention (Ainslie et al., 2023). We also train the 2B and 9B models with knowledge distillation (Hinton et al., 2015) instead of next token prediction. The resulting models deliver the best performance for their size, and even offer competitive alternatives to models that are 2-3 times bigger. We release all our models to the community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00118v3-abstract-full').style.display = 'none'; document.getElementById('2408.00118v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.08635">arXiv:2403.08635</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.08635">pdf</a>, <a href="https://arxiv.org/format/2403.08635">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Human Alignment of Large Language Models through Online Preference Optimisation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Calandriello%2C+D">Daniele Calandriello</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+D">Daniel Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Munos%2C+R">Remi Munos</a>, <a href="/search/cs?searchtype=author&amp;query=Rowland%2C+M">Mark Rowland</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+Y">Yunhao Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Pires%2C+B+A">Bernardo Avila Pires</a>, <a href="/search/cs?searchtype=author&amp;query=Richemond%2C+P+H">Pierre Harvey Richemond</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Valko%2C+M">Michal Valko</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tianqi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Joshi%2C+R">Rishabh Joshi</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zeyu Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Piot%2C+B">Bilal Piot</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.08635v1-abstract-short" style="display: inline;"> Ensuring alignment of language models&#39; outputs with human preferences is critical to guarantee a useful, safe, and pleasant user experience. Thus, human alignment has been extensively studied recently and several methods such as Reinforcement Learning from Human Feedback (RLHF), Direct Policy Optimisation (DPO) and Sequence Likelihood Calibration (SLiC) have emerged. In this paper, our contributio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08635v1-abstract-full').style.display = 'inline'; document.getElementById('2403.08635v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.08635v1-abstract-full" style="display: none;"> Ensuring alignment of language models&#39; outputs with human preferences is critical to guarantee a useful, safe, and pleasant user experience. Thus, human alignment has been extensively studied recently and several methods such as Reinforcement Learning from Human Feedback (RLHF), Direct Policy Optimisation (DPO) and Sequence Likelihood Calibration (SLiC) have emerged. In this paper, our contribution is two-fold. First, we show the equivalence between two recent alignment methods, namely Identity Policy Optimisation (IPO) and Nash Mirror Descent (Nash-MD). Second, we introduce a generalisation of IPO, named IPO-MD, that leverages the regularised sampling approach proposed by Nash-MD. This equivalence may seem surprising at first sight, since IPO is an offline method whereas Nash-MD is an online method using a preference model. However, this equivalence can be proven when we consider the online version of IPO, that is when both generations are sampled by the online policy and annotated by a trained preference model. Optimising the IPO loss with such a stream of data becomes then equivalent to finding the Nash equilibrium of the preference model through self-play. Building on this equivalence, we introduce the IPO-MD algorithm that generates data with a mixture policy (between the online and reference policy) similarly as the general Nash-MD algorithm. We compare online-IPO and IPO-MD to different online versions of existing losses on preference data such as DPO and SLiC on a summarisation task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08635v1-abstract-full').style.display = 'none'; document.getElementById('2403.08635v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.08295">arXiv:2403.08295</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.08295">pdf</a>, <a href="https://arxiv.org/format/2403.08295">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemma: Open Models Based on Gemini Research and Technology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gemma+Team"> Gemma Team</a>, <a href="/search/cs?searchtype=author&amp;query=Mesnard%2C+T">Thomas Mesnard</a>, <a href="/search/cs?searchtype=author&amp;query=Hardin%2C+C">Cassidy Hardin</a>, <a href="/search/cs?searchtype=author&amp;query=Dadashi%2C+R">Robert Dadashi</a>, <a href="/search/cs?searchtype=author&amp;query=Bhupatiraju%2C+S">Surya Bhupatiraju</a>, <a href="/search/cs?searchtype=author&amp;query=Pathak%2C+S">Shreya Pathak</a>, <a href="/search/cs?searchtype=author&amp;query=Sifre%2C+L">Laurent Sifre</a>, <a href="/search/cs?searchtype=author&amp;query=Rivi%C3%A8re%2C+M">Morgane Rivi猫re</a>, <a href="/search/cs?searchtype=author&amp;query=Kale%2C+M+S">Mihir Sanjay Kale</a>, <a href="/search/cs?searchtype=author&amp;query=Love%2C+J">Juliette Love</a>, <a href="/search/cs?searchtype=author&amp;query=Tafti%2C+P">Pouya Tafti</a>, <a href="/search/cs?searchtype=author&amp;query=Hussenot%2C+L">L茅onard Hussenot</a>, <a href="/search/cs?searchtype=author&amp;query=Sessa%2C+P+G">Pier Giuseppe Sessa</a>, <a href="/search/cs?searchtype=author&amp;query=Chowdhery%2C+A">Aakanksha Chowdhery</a>, <a href="/search/cs?searchtype=author&amp;query=Roberts%2C+A">Adam Roberts</a>, <a href="/search/cs?searchtype=author&amp;query=Barua%2C+A">Aditya Barua</a>, <a href="/search/cs?searchtype=author&amp;query=Botev%2C+A">Alex Botev</a>, <a href="/search/cs?searchtype=author&amp;query=Castro-Ros%2C+A">Alex Castro-Ros</a>, <a href="/search/cs?searchtype=author&amp;query=Slone%2C+A">Ambrose Slone</a>, <a href="/search/cs?searchtype=author&amp;query=H%C3%A9liou%2C+A">Am茅lie H茅liou</a>, <a href="/search/cs?searchtype=author&amp;query=Tacchetti%2C+A">Andrea Tacchetti</a>, <a href="/search/cs?searchtype=author&amp;query=Bulanova%2C+A">Anna Bulanova</a>, <a href="/search/cs?searchtype=author&amp;query=Paterson%2C+A">Antonia Paterson</a>, <a href="/search/cs?searchtype=author&amp;query=Tsai%2C+B">Beth Tsai</a>, <a href="/search/cs?searchtype=author&amp;query=Shahriari%2C+B">Bobak Shahriari</a> , et al. (83 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.08295v4-abstract-short" style="display: inline;"> This work introduces Gemma, a family of lightweight, state-of-the art open models built from the research and technology used to create Gemini models. Gemma models demonstrate strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Ge&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08295v4-abstract-full').style.display = 'inline'; document.getElementById('2403.08295v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.08295v4-abstract-full" style="display: none;"> This work introduces Gemma, a family of lightweight, state-of-the art open models built from the research and technology used to create Gemini models. Gemma models demonstrate strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed description of model development. We believe the responsible release of LLMs is critical for improving the safety of frontier models, and for enabling the next wave of LLM innovations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08295v4-abstract-full').style.display = 'none'; document.getElementById('2403.08295v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05530">arXiv:2403.05530</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.05530">pdf</a>, <a href="https://arxiv.org/format/2403.05530">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&amp;query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&amp;query=Lei%2C+V+I">Ving Ian Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Burnell%2C+R">Ryan Burnell</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+L">Libin Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Gulati%2C+A">Anmol Gulati</a>, <a href="/search/cs?searchtype=author&amp;query=Tanzer%2C+G">Garrett Tanzer</a>, <a href="/search/cs?searchtype=author&amp;query=Vincent%2C+D">Damien Vincent</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+Z">Zhufeng Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shibo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Mariooryad%2C+S">Soroosh Mariooryad</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Yifan Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Geng%2C+X">Xinyang Geng</a>, <a href="/search/cs?searchtype=author&amp;query=Alcober%2C+F">Fred Alcober</a>, <a href="/search/cs?searchtype=author&amp;query=Frostig%2C+R">Roy Frostig</a>, <a href="/search/cs?searchtype=author&amp;query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&amp;query=Walker%2C+L">Lexi Walker</a>, <a href="/search/cs?searchtype=author&amp;query=Paduraru%2C+C">Cosmin Paduraru</a>, <a href="/search/cs?searchtype=author&amp;query=Sorokin%2C+C">Christina Sorokin</a>, <a href="/search/cs?searchtype=author&amp;query=Tacchetti%2C+A">Andrea Tacchetti</a>, <a href="/search/cs?searchtype=author&amp;query=Gaffney%2C+C">Colin Gaffney</a>, <a href="/search/cs?searchtype=author&amp;query=Daruki%2C+S">Samira Daruki</a>, <a href="/search/cs?searchtype=author&amp;query=Sercinoglu%2C+O">Olcan Sercinoglu</a>, <a href="/search/cs?searchtype=author&amp;query=Gleicher%2C+Z">Zach Gleicher</a>, <a href="/search/cs?searchtype=author&amp;query=Love%2C+J">Juliette Love</a> , et al. (1112 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05530v5-abstract-short" style="display: inline;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v5-abstract-full').style.display = 'inline'; document.getElementById('2403.05530v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05530v5-abstract-full" style="display: none;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February version on the great majority of capabilities and benchmarks; (2) Gemini 1.5 Flash, a more lightweight variant designed for efficiency with minimal regression in quality. Gemini 1.5 models achieve near-perfect recall on long-context retrieval tasks across modalities, improve the state-of-the-art in long-document QA, long-video QA and long-context ASR, and match or surpass Gemini 1.0 Ultra&#39;s state-of-the-art performance across a broad set of benchmarks. Studying the limits of Gemini 1.5&#39;s long-context ability, we find continued improvement in next-token prediction and near-perfect retrieval (&gt;99%) up to at least 10M tokens, a generational leap over existing models such as Claude 3.0 (200k) and GPT-4 Turbo (128k). Finally, we highlight real-world use cases, such as Gemini 1.5 collaborating with professionals on completing their tasks achieving 26 to 75% time savings across 10 different job categories, as well as surprising new capabilities of large language models at the frontier; when given a grammar manual for Kalamang, a language with fewer than 200 speakers worldwide, the model learns to translate English to Kalamang at a similar level to a person who learned from the same content. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v5-abstract-full').style.display = 'none'; document.getElementById('2403.05530v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11805">arXiv:2312.11805</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.11805">pdf</a>, <a href="https://arxiv.org/format/2312.11805">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Gemini: A Family of Highly Capable Multimodal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&amp;query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&amp;query=Borgeaud%2C+S">Sebastian Borgeaud</a>, <a href="/search/cs?searchtype=author&amp;query=Alayrac%2C+J">Jean-Baptiste Alayrac</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">Jiahui Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Soricut%2C+R">Radu Soricut</a>, <a href="/search/cs?searchtype=author&amp;query=Schalkwyk%2C+J">Johan Schalkwyk</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Hauth%2C+A">Anja Hauth</a>, <a href="/search/cs?searchtype=author&amp;query=Millican%2C+K">Katie Millican</a>, <a href="/search/cs?searchtype=author&amp;query=Silver%2C+D">David Silver</a>, <a href="/search/cs?searchtype=author&amp;query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&amp;query=Antonoglou%2C+I">Ioannis Antonoglou</a>, <a href="/search/cs?searchtype=author&amp;query=Schrittwieser%2C+J">Julian Schrittwieser</a>, <a href="/search/cs?searchtype=author&amp;query=Glaese%2C+A">Amelia Glaese</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jilin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Pitler%2C+E">Emily Pitler</a>, <a href="/search/cs?searchtype=author&amp;query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&amp;query=Lazaridou%2C+A">Angeliki Lazaridou</a>, <a href="/search/cs?searchtype=author&amp;query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&amp;query=Molloy%2C+J">James Molloy</a>, <a href="/search/cs?searchtype=author&amp;query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&amp;query=Barham%2C+P+R">Paul R. Barham</a>, <a href="/search/cs?searchtype=author&amp;query=Hennigan%2C+T">Tom Hennigan</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+B">Benjamin Lee</a> , et al. (1325 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11805v4-abstract-short" style="display: inline;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'inline'; document.getElementById('2312.11805v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11805v4-abstract-full" style="display: none;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultra model advances the state of the art in 30 of 32 of these benchmarks - notably being the first model to achieve human-expert performance on the well-studied exam benchmark MMLU, and improving the state of the art in every one of the 20 multimodal benchmarks we examined. We believe that the new capabilities of the Gemini family in cross-modal reasoning and language understanding will enable a wide variety of use cases. We discuss our approach toward post-training and deploying Gemini models responsibly to users through services including Gemini, Gemini Advanced, Google AI Studio, and Cloud Vertex AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'none'; document.getElementById('2312.11805v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.10171">arXiv:2306.10171</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.10171">pdf</a>, <a href="https://arxiv.org/format/2306.10171">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Bootstrapped Representations in Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Tu%2C+S">Stephen Tu</a>, <a href="/search/cs?searchtype=author&amp;query=Rowland%2C+M">Mark Rowland</a>, <a href="/search/cs?searchtype=author&amp;query=Harutyunyan%2C+A">Anna Harutyunyan</a>, <a href="/search/cs?searchtype=author&amp;query=Agarwal%2C+R">Rishabh Agarwal</a>, <a href="/search/cs?searchtype=author&amp;query=Bellemare%2C+M+G">Marc G. Bellemare</a>, <a href="/search/cs?searchtype=author&amp;query=Dabney%2C+W">Will Dabney</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.10171v1-abstract-short" style="display: inline;"> In reinforcement learning (RL), state representations are key to dealing with large or continuous state spaces. While one of the promises of deep learning algorithms is to automatically construct features well-tuned for the task they try to solve, such a representation might not emerge from end-to-end training of deep RL agents. To mitigate this issue, auxiliary objectives are often incorporated i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10171v1-abstract-full').style.display = 'inline'; document.getElementById('2306.10171v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.10171v1-abstract-full" style="display: none;"> In reinforcement learning (RL), state representations are key to dealing with large or continuous state spaces. While one of the promises of deep learning algorithms is to automatically construct features well-tuned for the task they try to solve, such a representation might not emerge from end-to-end training of deep RL agents. To mitigate this issue, auxiliary objectives are often incorporated into the learning process and help shape the learnt state representation. Bootstrapping methods are today&#39;s method of choice to make these additional predictions. Yet, it is unclear which features these algorithms capture and how they relate to those from other auxiliary-task-based approaches. In this paper, we address this gap and provide a theoretical characterization of the state representation learnt by temporal difference learning (Sutton, 1988). Surprisingly, we find that this representation differs from the features learned by Monte Carlo and residual gradient algorithms for most transition structures of the environment in the policy evaluation setting. We describe the efficacy of these representations for policy evaluation, and use our theoretical analysis to design new auxiliary learning rules. We complement our theoretical results with an empirical comparison of these learning rules for different cumulant functions on classic domains such as the four-room domain (Sutton et al, 1999) and Mountain Car (Moore, 1990). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10171v1-abstract-full').style.display = 'none'; document.getElementById('2306.10171v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.12567">arXiv:2304.12567</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.12567">pdf</a>, <a href="https://arxiv.org/format/2304.12567">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Proto-Value Networks: Scaling Representation Learning with Auxiliary Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Farebrother%2C+J">Jesse Farebrother</a>, <a href="/search/cs?searchtype=author&amp;query=Greaves%2C+J">Joshua Greaves</a>, <a href="/search/cs?searchtype=author&amp;query=Agarwal%2C+R">Rishabh Agarwal</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Goroshin%2C+R">Ross Goroshin</a>, <a href="/search/cs?searchtype=author&amp;query=Castro%2C+P+S">Pablo Samuel Castro</a>, <a href="/search/cs?searchtype=author&amp;query=Bellemare%2C+M+G">Marc G. Bellemare</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.12567v1-abstract-short" style="display: inline;"> Auxiliary tasks improve the representations learned by deep reinforcement learning agents. Analytically, their effect is reasonably well understood; in practice, however, their primary use remains in support of a main learning objective, rather than as a method for learning representations. This is perhaps surprising given that many auxiliary tasks are defined procedurally, and hence can be treate&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.12567v1-abstract-full').style.display = 'inline'; document.getElementById('2304.12567v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.12567v1-abstract-full" style="display: none;"> Auxiliary tasks improve the representations learned by deep reinforcement learning agents. Analytically, their effect is reasonably well understood; in practice, however, their primary use remains in support of a main learning objective, rather than as a method for learning representations. This is perhaps surprising given that many auxiliary tasks are defined procedurally, and hence can be treated as an essentially infinite source of information about the environment. Based on this observation, we study the effectiveness of auxiliary tasks for learning rich representations, focusing on the setting where the number of tasks and the size of the agent&#39;s network are simultaneously increased. For this purpose, we derive a new family of auxiliary tasks based on the successor measure. These tasks are easy to implement and have appealing theoretical properties. Combined with a suitable off-policy learning rule, the result is a representation learning algorithm that can be understood as extending Mahadevan &amp; Maggioni (2007)&#39;s proto-value functions to deep reinforcement learning -- accordingly, we call the resulting object proto-value networks. Through a series of experiments on the Arcade Learning Environment, we demonstrate that proto-value networks produce rich features that may be used to obtain performance comparable to established algorithms, using only linear approximation and a small number (~4M) of interactions with the environment&#39;s reward function. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.12567v1-abstract-full').style.display = 'none'; document.getElementById('2304.12567v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2023. Code and models are available at https://github.com/google-research/google-research/tree/master/pvn 22 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.04025">arXiv:2212.04025</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.04025">pdf</a>, <a href="https://arxiv.org/format/2212.04025">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Novel Stochastic Gradient Descent Algorithm for Learning Principal Subspaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Greaves%2C+J">Joshua Greaves</a>, <a href="/search/cs?searchtype=author&amp;query=Farebrother%2C+J">Jesse Farebrother</a>, <a href="/search/cs?searchtype=author&amp;query=Rowland%2C+M">Mark Rowland</a>, <a href="/search/cs?searchtype=author&amp;query=Pedregosa%2C+F">Fabian Pedregosa</a>, <a href="/search/cs?searchtype=author&amp;query=Agarwal%2C+R">Rishabh Agarwal</a>, <a href="/search/cs?searchtype=author&amp;query=Bellemare%2C+M+G">Marc G. Bellemare</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.04025v1-abstract-short" style="display: inline;"> Many machine learning problems encode their data as a matrix with a possibly very large number of rows and columns. In several applications like neuroscience, image compression or deep reinforcement learning, the principal subspace of such a matrix provides a useful, low-dimensional representation of individual data. Here, we are interested in determining the $d$-dimensional principal subspace of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.04025v1-abstract-full').style.display = 'inline'; document.getElementById('2212.04025v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.04025v1-abstract-full" style="display: none;"> Many machine learning problems encode their data as a matrix with a possibly very large number of rows and columns. In several applications like neuroscience, image compression or deep reinforcement learning, the principal subspace of such a matrix provides a useful, low-dimensional representation of individual data. Here, we are interested in determining the $d$-dimensional principal subspace of a given matrix from sample entries, i.e. from small random submatrices. Although a number of sample-based methods exist for this problem (e.g. Oja&#39;s rule \citep{oja1982simplified}), these assume access to full columns of the matrix or particular matrix structure such as symmetry and cannot be combined as-is with neural networks \citep{baldi1989neural}. In this paper, we derive an algorithm that learns a principal subspace from sample entries, can be applied when the approximate subspace is represented by a neural network, and hence can be scaled to datasets with an effectively infinite number of rows and columns. Our method consists in defining a loss function whose minimizer is the desired principal subspace, and constructing a gradient estimate of this loss whose bias can be controlled. We complement our theoretical analysis with a series of experiments on synthetic matrices, the MNIST dataset \citep{lecun2010mnist} and the reinforcement learning domain PuddleWorld \citep{sutton1995generalization} demonstrating the usefulness of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.04025v1-abstract-full').style.display = 'none'; document.getElementById('2212.04025v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages in main content, 2 pages of bibliography and 5 pages in Appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.03319">arXiv:2212.03319</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.03319">pdf</a>, <a href="https://arxiv.org/format/2212.03319">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Understanding Self-Predictive Learning for Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tang%2C+Y">Yunhao Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Z+D">Zhaohan Daniel Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Richemond%2C+P+H">Pierre Harvey Richemond</a>, <a href="/search/cs?searchtype=author&amp;query=Pires%2C+B+%C3%81">Bernardo 脕vila Pires</a>, <a href="/search/cs?searchtype=author&amp;query=Chandak%2C+Y">Yash Chandak</a>, <a href="/search/cs?searchtype=author&amp;query=Munos%2C+R">R茅mi Munos</a>, <a href="/search/cs?searchtype=author&amp;query=Rowland%2C+M">Mark Rowland</a>, <a href="/search/cs?searchtype=author&amp;query=Azar%2C+M+G">Mohammad Gheshlaghi Azar</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Lyle%2C+C">Clare Lyle</a>, <a href="/search/cs?searchtype=author&amp;query=Gy%C3%B6rgy%2C+A">Andr谩s Gy枚rgy</a>, <a href="/search/cs?searchtype=author&amp;query=Thakoor%2C+S">Shantanu Thakoor</a>, <a href="/search/cs?searchtype=author&amp;query=Dabney%2C+W">Will Dabney</a>, <a href="/search/cs?searchtype=author&amp;query=Piot%2C+B">Bilal Piot</a>, <a href="/search/cs?searchtype=author&amp;query=Calandriello%2C+D">Daniele Calandriello</a>, <a href="/search/cs?searchtype=author&amp;query=Valko%2C+M">Michal Valko</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.03319v1-abstract-short" style="display: inline;"> We study the learning dynamics of self-predictive learning for reinforcement learning, a family of algorithms that learn representations by minimizing the prediction error of their own future latent representations. Despite its recent empirical success, such algorithms have an apparent defect: trivial representations (such as constants) minimize the prediction error, yet it is obviously undesirabl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.03319v1-abstract-full').style.display = 'inline'; document.getElementById('2212.03319v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.03319v1-abstract-full" style="display: none;"> We study the learning dynamics of self-predictive learning for reinforcement learning, a family of algorithms that learn representations by minimizing the prediction error of their own future latent representations. Despite its recent empirical success, such algorithms have an apparent defect: trivial representations (such as constants) minimize the prediction error, yet it is obviously undesirable to converge to such solutions. Our central insight is that careful designs of the optimization dynamics are critical to learning meaningful representations. We identify that a faster paced optimization of the predictor and semi-gradient updates on the representation, are crucial to preventing the representation collapse. Then in an idealized setup, we show self-predictive learning dynamics carries out spectral decomposition on the state transition matrix, effectively capturing information of the transition dynamics. Building on the theoretical insights, we propose bidirectional self-predictive learning, a novel self-predictive algorithm that learns two representations simultaneously. We examine the robustness of our theoretical insights with a number of small-scale experiments and showcase the promise of the novel representation learning algorithm with large-scale experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.03319v1-abstract-full').style.display = 'none'; document.getElementById('2212.03319v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.00543">arXiv:2203.00543</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2203.00543">pdf</a>, <a href="https://arxiv.org/format/2203.00543">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> On the Generalization of Representations in Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Tu%2C+S">Stephen Tu</a>, <a href="/search/cs?searchtype=author&amp;query=Oberman%2C+A">Adam Oberman</a>, <a href="/search/cs?searchtype=author&amp;query=Agarwal%2C+R">Rishabh Agarwal</a>, <a href="/search/cs?searchtype=author&amp;query=Bellemare%2C+M+G">Marc G. Bellemare</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.00543v1-abstract-short" style="display: inline;"> In reinforcement learning, state representations are used to tractably deal with large problem spaces. State representations serve both to approximate the value function with few parameters, but also to generalize to newly encountered states. Their features may be learned implicitly (as part of a neural network) or explicitly (for example, the successor representation of \citet{dayan1993improving}&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.00543v1-abstract-full').style.display = 'inline'; document.getElementById('2203.00543v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.00543v1-abstract-full" style="display: none;"> In reinforcement learning, state representations are used to tractably deal with large problem spaces. State representations serve both to approximate the value function with few parameters, but also to generalize to newly encountered states. Their features may be learned implicitly (as part of a neural network) or explicitly (for example, the successor representation of \citet{dayan1993improving}). While the approximation properties of representations are reasonably well-understood, a precise characterization of how and when these representations generalize is lacking. In this work, we address this gap and provide an informative bound on the generalization error arising from a specific state representation. This bound is based on the notion of effective dimension which measures the degree to which knowing the value at one state informs the value at other states. Our bound applies to any state representation and quantifies the natural tension between representations that generalize well and those that approximate well. We complement our theoretical results with an empirical survey of classic representation learning methods from the literature and results on the Arcade Learning Environment, and find that the generalization behaviour of learned representations is well-explained by their effective dimension. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.00543v1-abstract-full').style.display = 'none'; document.getElementById('2203.00543v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AISTATS22</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.01514">arXiv:2102.01514</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2102.01514">pdf</a>, <a href="https://arxiv.org/format/2102.01514">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Metrics and continuity in reinforcement learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Bellemare%2C+M+G">Marc G. Bellemare</a>, <a href="/search/cs?searchtype=author&amp;query=Castro%2C+P+S">Pablo Samuel Castro</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.01514v1-abstract-short" style="display: inline;"> In most practical applications of reinforcement learning, it is untenable to maintain direct estimates for individual states; in continuous-state systems, it is impossible. Instead, researchers often leverage state similarity (whether explicitly or implicitly) to build models that can generalize well from a limited set of samples. The notion of state similarity used, and the neighbourhoods and top&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.01514v1-abstract-full').style.display = 'inline'; document.getElementById('2102.01514v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.01514v1-abstract-full" style="display: none;"> In most practical applications of reinforcement learning, it is untenable to maintain direct estimates for individual states; in continuous-state systems, it is impossible. Instead, researchers often leverage state similarity (whether explicitly or implicitly) to build models that can generalize well from a limited set of samples. The notion of state similarity used, and the neighbourhoods and topologies they induce, is thus of crucial importance, as it will directly affect the performance of the algorithms. Indeed, a number of recent works introduce algorithms assuming the existence of &#34;well-behaved&#34; neighbourhoods, but leave the full specification of such topologies for future work. In this paper we introduce a unified formalism for defining these topologies through the lens of metrics. We establish a hierarchy amongst these metrics and demonstrate their theoretical implications on the Markov Decision Process specifying the reinforcement learning problem. We complement our theoretical results with empirical evaluations showcasing the differences between the metrics considered. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.01514v1-abstract-full').style.display = 'none'; document.getElementById('2102.01514v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AAAI 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.10885">arXiv:2012.10885</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2012.10885">pdf</a>, <a href="https://arxiv.org/format/2012.10885">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> LieTransformer: Equivariant self-attention for Lie Groups </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hutchinson%2C+M">Michael Hutchinson</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Zaidi%2C+S">Sheheryar Zaidi</a>, <a href="/search/cs?searchtype=author&amp;query=Dupont%2C+E">Emilien Dupont</a>, <a href="/search/cs?searchtype=author&amp;query=Teh%2C+Y+W">Yee Whye Teh</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+H">Hyunjik Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.10885v4-abstract-short" style="display: inline;"> Group equivariant neural networks are used as building blocks of group invariant neural networks, which have been shown to improve generalisation performance and data efficiency through principled parameter sharing. Such works have mostly focused on group equivariant convolutions, building on the result that group equivariant linear maps are necessarily convolutions. In this work, we extend the sc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.10885v4-abstract-full').style.display = 'inline'; document.getElementById('2012.10885v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.10885v4-abstract-full" style="display: none;"> Group equivariant neural networks are used as building blocks of group invariant neural networks, which have been shown to improve generalisation performance and data efficiency through principled parameter sharing. Such works have mostly focused on group equivariant convolutions, building on the result that group equivariant linear maps are necessarily convolutions. In this work, we extend the scope of the literature to self-attention, that is emerging as a prominent building block of deep learning models. We propose the LieTransformer, an architecture composed of LieSelfAttention layers that are equivariant to arbitrary Lie groups and their discrete subgroups. We demonstrate the generality of our approach by showing experimental results that are competitive to baseline methods on a wide range of tasks: shape counting on point clouds, molecular property regression and modelling particle trajectories under Hamiltonian dynamics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.10885v4-abstract-full').style.display = 'none'; document.getElementById('2012.10885v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.03808">arXiv:2012.03808</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2012.03808">pdf</a>, <a href="https://arxiv.org/format/2012.03808">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/e23121690">10.3390/e23121690 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Perfect density models cannot guarantee anomaly detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Dinh%2C+L">Laurent Dinh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.03808v3-abstract-short" style="display: inline;"> Thanks to the tractability of their likelihood, several deep generative models show promise for seemingly straightforward but important applications like anomaly detection, uncertainty estimation, and active learning. However, the likelihood values empirically attributed to anomalies conflict with the expectations these proposed applications suggest. In this paper, we take a closer look at the beh&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.03808v3-abstract-full').style.display = 'inline'; document.getElementById('2012.03808v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.03808v3-abstract-full" style="display: none;"> Thanks to the tractability of their likelihood, several deep generative models show promise for seemingly straightforward but important applications like anomaly detection, uncertainty estimation, and active learning. However, the likelihood values empirically attributed to anomalies conflict with the expectations these proposed applications suggest. In this paper, we take a closer look at the behavior of distribution densities through the lens of reparametrization and show that these quantities carry less meaningful information than previously thought, beyond estimation issues or the curse of dimensionality. We conclude that the use of these likelihoods for anomaly detection relies on strong and implicit hypotheses, and highlight the necessity of explicitly formulating these assumptions for reliable anomaly detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.03808v3-abstract-full').style.display = 'none'; document.getElementById('2012.03808v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the Special Issue &#34;Probabilistic Methods for Deep Learning&#34; of the Journal Entropy. 14 pages and 10 figures in main content, 4 pages of bibliography, and 2 pages in Appendix</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Entropy 23 (2021) 1690 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1901.06033">arXiv:1901.06033</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1901.06033">pdf</a>, <a href="https://arxiv.org/format/1901.06033">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Continuous Hierarchical Representations with Poincar茅 Variational Auto-Encoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Mathieu%2C+E">Emile Mathieu</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Maddison%2C+C+J">Chris J. Maddison</a>, <a href="/search/cs?searchtype=author&amp;query=Tomioka%2C+R">Ryota Tomioka</a>, <a href="/search/cs?searchtype=author&amp;query=Teh%2C+Y+W">Yee Whye Teh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1901.06033v3-abstract-short" style="display: inline;"> The variational auto-encoder (VAE) is a popular method for learning a generative model and embeddings of the data. Many real datasets are hierarchically structured. However, traditional VAEs map data in a Euclidean latent space which cannot efficiently embed tree-like structures. Hyperbolic spaces with negative curvature can. We therefore endow VAEs with a Poincar茅 ball model of hyperbolic geometr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.06033v3-abstract-full').style.display = 'inline'; document.getElementById('1901.06033v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1901.06033v3-abstract-full" style="display: none;"> The variational auto-encoder (VAE) is a popular method for learning a generative model and embeddings of the data. Many real datasets are hierarchically structured. However, traditional VAEs map data in a Euclidean latent space which cannot efficiently embed tree-like structures. Hyperbolic spaces with negative curvature can. We therefore endow VAEs with a Poincar茅 ball model of hyperbolic geometry as a latent space and rigorously derive the necessary methods to work with two main Gaussian generalisations on that space. We empirically show better generalisation to unseen data than the Euclidean counterpart, and can qualitatively and quantitatively better recover hierarchical structures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.06033v3-abstract-full').style.display = 'none'; document.getElementById('1901.06033v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Advances in Neural Information Processing Systems</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10