Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–19 of 19 results for author: <span class="mathjax">Ozair, S</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Ozair%2C+S">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ozair, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ozair%2C+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ozair, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15391">arXiv:2402.15391</a> <span> [<a href="https://arxiv.org/pdf/2402.15391">pdf</a>, <a href="https://arxiv.org/format/2402.15391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Genie: Generative Interactive Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bruce%2C+J">Jake Bruce</a>, <a href="/search/cs?searchtype=author&query=Dennis%2C+M">Michael Dennis</a>, <a href="/search/cs?searchtype=author&query=Edwards%2C+A">Ashley Edwards</a>, <a href="/search/cs?searchtype=author&query=Parker-Holder%2C+J">Jack Parker-Holder</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yuge Shi</a>, <a href="/search/cs?searchtype=author&query=Hughes%2C+E">Edward Hughes</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+M">Matthew Lai</a>, <a href="/search/cs?searchtype=author&query=Mavalankar%2C+A">Aditi Mavalankar</a>, <a href="/search/cs?searchtype=author&query=Steigerwald%2C+R">Richie Steigerwald</a>, <a href="/search/cs?searchtype=author&query=Apps%2C+C">Chris Apps</a>, <a href="/search/cs?searchtype=author&query=Aytar%2C+Y">Yusuf Aytar</a>, <a href="/search/cs?searchtype=author&query=Bechtle%2C+S">Sarah Bechtle</a>, <a href="/search/cs?searchtype=author&query=Behbahani%2C+F">Feryal Behbahani</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+S">Stephanie Chan</a>, <a href="/search/cs?searchtype=author&query=Heess%2C+N">Nicolas Heess</a>, <a href="/search/cs?searchtype=author&query=Gonzalez%2C+L">Lucy Gonzalez</a>, <a href="/search/cs?searchtype=author&query=Osindero%2C+S">Simon Osindero</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Reed%2C+S">Scott Reed</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jingwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zolna%2C+K">Konrad Zolna</a>, <a href="/search/cs?searchtype=author&query=Clune%2C+J">Jeff Clune</a>, <a href="/search/cs?searchtype=author&query=de+Freitas%2C+N">Nando de Freitas</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+S">Satinder Singh</a>, <a href="/search/cs?searchtype=author&query=Rockt%C3%A4schel%2C+T">Tim Rockt盲schel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15391v1-abstract-short" style="display: inline;"> We introduce Genie, the first generative interactive environment trained in an unsupervised manner from unlabelled Internet videos. The model can be prompted to generate an endless variety of action-controllable virtual worlds described through text, synthetic images, photographs, and even sketches. At 11B parameters, Genie can be considered a foundation world model. It is comprised of a spatiotem… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15391v1-abstract-full').style.display = 'inline'; document.getElementById('2402.15391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15391v1-abstract-full" style="display: none;"> We introduce Genie, the first generative interactive environment trained in an unsupervised manner from unlabelled Internet videos. The model can be prompted to generate an endless variety of action-controllable virtual worlds described through text, synthetic images, photographs, and even sketches. At 11B parameters, Genie can be considered a foundation world model. It is comprised of a spatiotemporal video tokenizer, an autoregressive dynamics model, and a simple and scalable latent action model. Genie enables users to act in the generated environments on a frame-by-frame basis despite training without any ground-truth action labels or other domain-specific requirements typically found in the world model literature. Further the resulting learned latent action space facilitates training agents to imitate behaviors from unseen videos, opening the path for training generalist agents of the future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15391v1-abstract-full').style.display = 'none'; document.getElementById('2402.15391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://sites.google.com/corp/view/genie-2024/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11805">arXiv:2312.11805</a> <span> [<a href="https://arxiv.org/pdf/2312.11805">pdf</a>, <a href="https://arxiv.org/format/2312.11805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Gemini: A Family of Highly Capable Multimodal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&query=Borgeaud%2C+S">Sebastian Borgeaud</a>, <a href="/search/cs?searchtype=author&query=Alayrac%2C+J">Jean-Baptiste Alayrac</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jiahui Yu</a>, <a href="/search/cs?searchtype=author&query=Soricut%2C+R">Radu Soricut</a>, <a href="/search/cs?searchtype=author&query=Schalkwyk%2C+J">Johan Schalkwyk</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&query=Hauth%2C+A">Anja Hauth</a>, <a href="/search/cs?searchtype=author&query=Millican%2C+K">Katie Millican</a>, <a href="/search/cs?searchtype=author&query=Silver%2C+D">David Silver</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&query=Antonoglou%2C+I">Ioannis Antonoglou</a>, <a href="/search/cs?searchtype=author&query=Schrittwieser%2C+J">Julian Schrittwieser</a>, <a href="/search/cs?searchtype=author&query=Glaese%2C+A">Amelia Glaese</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jilin Chen</a>, <a href="/search/cs?searchtype=author&query=Pitler%2C+E">Emily Pitler</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Lazaridou%2C+A">Angeliki Lazaridou</a>, <a href="/search/cs?searchtype=author&query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&query=Molloy%2C+J">James Molloy</a>, <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Barham%2C+P+R">Paul R. Barham</a>, <a href="/search/cs?searchtype=author&query=Hennigan%2C+T">Tom Hennigan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+B">Benjamin Lee</a> , et al. (1325 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11805v4-abstract-short" style="display: inline;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'inline'; document.getElementById('2312.11805v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11805v4-abstract-full" style="display: none;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultra model advances the state of the art in 30 of 32 of these benchmarks - notably being the first model to achieve human-expert performance on the well-studied exam benchmark MMLU, and improving the state of the art in every one of the 20 multimodal benchmarks we examined. We believe that the new capabilities of the Gemini family in cross-modal reasoning and language understanding will enable a wide variety of use cases. We discuss our approach toward post-training and deploying Gemini models responsibly to users through services including Gemini, Gemini Advanced, Google AI Studio, and Cloud Vertex AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'none'; document.getElementById('2312.11805v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.03526">arXiv:2308.03526</a> <span> [<a href="https://arxiv.org/pdf/2308.03526">pdf</a>, <a href="https://arxiv.org/format/2308.03526">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AlphaStar Unplugged: Large-Scale Offline Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mathieu%2C+M">Micha毛l Mathieu</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Srinivasan%2C+S">Srivatsan Srinivasan</a>, <a href="/search/cs?searchtype=author&query=Gulcehre%2C+C">Caglar Gulcehre</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shangtong Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+R">Ray Jiang</a>, <a href="/search/cs?searchtype=author&query=Paine%2C+T+L">Tom Le Paine</a>, <a href="/search/cs?searchtype=author&query=Powell%2C+R">Richard Powell</a>, <a href="/search/cs?searchtype=author&query=%C5%BBo%C5%82na%2C+K">Konrad 呕o艂na</a>, <a href="/search/cs?searchtype=author&query=Schrittwieser%2C+J">Julian Schrittwieser</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+D">David Choi</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&query=Toyama%2C+D">Daniel Toyama</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+A">Aja Huang</a>, <a href="/search/cs?searchtype=author&query=Ring%2C+R">Roman Ring</a>, <a href="/search/cs?searchtype=author&query=Babuschkin%2C+I">Igor Babuschkin</a>, <a href="/search/cs?searchtype=author&query=Ewalds%2C+T">Timo Ewalds</a>, <a href="/search/cs?searchtype=author&query=Bordbar%2C+M">Mahyar Bordbar</a>, <a href="/search/cs?searchtype=author&query=Henderson%2C+S">Sarah Henderson</a>, <a href="/search/cs?searchtype=author&query=Colmenarejo%2C+S+G">Sergio G贸mez Colmenarejo</a>, <a href="/search/cs?searchtype=author&query=Oord%2C+A+v+d">A盲ron van den Oord</a>, <a href="/search/cs?searchtype=author&query=Czarnecki%2C+W+M">Wojciech Marian Czarnecki</a>, <a href="/search/cs?searchtype=author&query=de+Freitas%2C+N">Nando de Freitas</a>, <a href="/search/cs?searchtype=author&query=Vinyals%2C+O">Oriol Vinyals</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.03526v1-abstract-short" style="display: inline;"> StarCraft II is one of the most challenging simulated reinforcement learning environments; it is partially observable, stochastic, multi-agent, and mastering StarCraft II requires strategic planning over long time horizons with real-time low-level execution. It also has an active professional competitive scene. StarCraft II is uniquely suited for advancing offline RL algorithms, both because of it… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03526v1-abstract-full').style.display = 'inline'; document.getElementById('2308.03526v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.03526v1-abstract-full" style="display: none;"> StarCraft II is one of the most challenging simulated reinforcement learning environments; it is partially observable, stochastic, multi-agent, and mastering StarCraft II requires strategic planning over long time horizons with real-time low-level execution. It also has an active professional competitive scene. StarCraft II is uniquely suited for advancing offline RL algorithms, both because of its challenging nature and because Blizzard has released a massive dataset of millions of StarCraft II games played by human players. This paper leverages that and establishes a benchmark, called AlphaStar Unplugged, introducing unprecedented challenges for offline reinforcement learning. We define a dataset (a subset of Blizzard's release), tools standardizing an API for machine learning methods, and an evaluation protocol. We also present baseline agents, including behavior cloning, offline variants of actor-critic and MuZero. We improve the state of the art of agents using only offline data, and we achieve 90% win rate against previously published AlphaStar behavior cloning agent. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03526v1-abstract-full').style.display = 'none'; document.getElementById('2308.03526v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages, 13 figures, previous version published as a NeurIPS 2021 workshop: https://openreview.net/forum?id=Np8Pumfoty</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.15378">arXiv:2206.15378</a> <span> [<a href="https://arxiv.org/pdf/2206.15378">pdf</a>, <a href="https://arxiv.org/format/2206.15378">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1126/science.add4679">10.1126/science.add4679 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Mastering the Game of Stratego with Model-Free Multiagent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Perolat%2C+J">Julien Perolat</a>, <a href="/search/cs?searchtype=author&query=de+Vylder%2C+B">Bart de Vylder</a>, <a href="/search/cs?searchtype=author&query=Hennes%2C+D">Daniel Hennes</a>, <a href="/search/cs?searchtype=author&query=Tarassov%2C+E">Eugene Tarassov</a>, <a href="/search/cs?searchtype=author&query=Strub%2C+F">Florian Strub</a>, <a href="/search/cs?searchtype=author&query=de+Boer%2C+V">Vincent de Boer</a>, <a href="/search/cs?searchtype=author&query=Muller%2C+P">Paul Muller</a>, <a href="/search/cs?searchtype=author&query=Connor%2C+J+T">Jerome T. Connor</a>, <a href="/search/cs?searchtype=author&query=Burch%2C+N">Neil Burch</a>, <a href="/search/cs?searchtype=author&query=Anthony%2C+T">Thomas Anthony</a>, <a href="/search/cs?searchtype=author&query=McAleer%2C+S">Stephen McAleer</a>, <a href="/search/cs?searchtype=author&query=Elie%2C+R">Romuald Elie</a>, <a href="/search/cs?searchtype=author&query=Cen%2C+S+H">Sarah H. Cen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhe Wang</a>, <a href="/search/cs?searchtype=author&query=Gruslys%2C+A">Audrunas Gruslys</a>, <a href="/search/cs?searchtype=author&query=Malysheva%2C+A">Aleksandra Malysheva</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+M">Mina Khan</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Timbers%2C+F">Finbarr Timbers</a>, <a href="/search/cs?searchtype=author&query=Pohlen%2C+T">Toby Pohlen</a>, <a href="/search/cs?searchtype=author&query=Eccles%2C+T">Tom Eccles</a>, <a href="/search/cs?searchtype=author&query=Rowland%2C+M">Mark Rowland</a>, <a href="/search/cs?searchtype=author&query=Lanctot%2C+M">Marc Lanctot</a>, <a href="/search/cs?searchtype=author&query=Lespiau%2C+J">Jean-Baptiste Lespiau</a>, <a href="/search/cs?searchtype=author&query=Piot%2C+B">Bilal Piot</a> , et al. (9 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.15378v1-abstract-short" style="display: inline;"> We introduce DeepNash, an autonomous agent capable of learning to play the imperfect information game Stratego from scratch, up to a human expert level. Stratego is one of the few iconic board games that Artificial Intelligence (AI) has not yet mastered. This popular game has an enormous game tree on the order of $10^{535}$ nodes, i.e., $10^{175}$ times larger than that of Go. It has the additiona… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.15378v1-abstract-full').style.display = 'inline'; document.getElementById('2206.15378v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.15378v1-abstract-full" style="display: none;"> We introduce DeepNash, an autonomous agent capable of learning to play the imperfect information game Stratego from scratch, up to a human expert level. Stratego is one of the few iconic board games that Artificial Intelligence (AI) has not yet mastered. This popular game has an enormous game tree on the order of $10^{535}$ nodes, i.e., $10^{175}$ times larger than that of Go. It has the additional complexity of requiring decision-making under imperfect information, similar to Texas hold'em poker, which has a significantly smaller game tree (on the order of $10^{164}$ nodes). Decisions in Stratego are made over a large number of discrete actions with no obvious link between action and outcome. Episodes are long, with often hundreds of moves before a player wins, and situations in Stratego can not easily be broken down into manageably-sized sub-problems as in poker. For these reasons, Stratego has been a grand challenge for the field of AI for decades, and existing AI methods barely reach an amateur level of play. DeepNash uses a game-theoretic, model-free deep reinforcement learning method, without search, that learns to master Stratego via self-play. The Regularised Nash Dynamics (R-NaD) algorithm, a key component of DeepNash, converges to an approximate Nash equilibrium, instead of 'cycling' around it, by directly modifying the underlying multi-agent learning dynamics. DeepNash beats existing state-of-the-art AI methods in Stratego and achieved a yearly (2022) and all-time top-3 rank on the Gravon games platform, competing with human expert players. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.15378v1-abstract-full').style.display = 'none'; document.getElementById('2206.15378v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.01587">arXiv:2111.01587</a> <span> [<a href="https://arxiv.org/pdf/2111.01587">pdf</a>, <a href="https://arxiv.org/format/2111.01587">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Procedural Generalization by Planning with Self-Supervised World Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Anand%2C+A">Ankesh Anand</a>, <a href="/search/cs?searchtype=author&query=Walker%2C+J">Jacob Walker</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yazhe Li</a>, <a href="/search/cs?searchtype=author&query=V%C3%A9rtes%2C+E">Eszter V茅rtes</a>, <a href="/search/cs?searchtype=author&query=Schrittwieser%2C+J">Julian Schrittwieser</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Weber%2C+T">Th茅ophane Weber</a>, <a href="/search/cs?searchtype=author&query=Hamrick%2C+J+B">Jessica B. Hamrick</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.01587v1-abstract-short" style="display: inline;"> One of the key promises of model-based reinforcement learning is the ability to generalize using an internal model of the world to make predictions in novel environments and tasks. However, the generalization ability of model-based agents is not well understood because existing work has focused on model-free agents when benchmarking generalization. Here, we explicitly measure the generalization ab… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.01587v1-abstract-full').style.display = 'inline'; document.getElementById('2111.01587v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.01587v1-abstract-full" style="display: none;"> One of the key promises of model-based reinforcement learning is the ability to generalize using an internal model of the world to make predictions in novel environments and tasks. However, the generalization ability of model-based agents is not well understood because existing work has focused on model-free agents when benchmarking generalization. Here, we explicitly measure the generalization ability of model-based agents in comparison to their model-free counterparts. We focus our analysis on MuZero (Schrittwieser et al., 2020), a powerful model-based agent, and evaluate its performance on both procedural and task generalization. We identify three factors of procedural generalization -- planning, self-supervised representation learning, and procedural data diversity -- and show that by combining these techniques, we achieve state-of-the art generalization performance and data efficiency on Procgen (Cobbe et al., 2019). However, we find that these factors do not always provide the same benefits for the task generalization benchmarks in Meta-World (Yu et al., 2019), indicating that transfer remains a challenge and may require different approaches than procedural generalization. Overall, we suggest that building generalizable agents requires moving beyond the single-task, model-free paradigm and towards self-supervised model-based agents that are trained in rich, procedural, multi-task environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.01587v1-abstract-full').style.display = 'none'; document.getElementById('2111.01587v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.05139">arXiv:2106.05139</a> <span> [<a href="https://arxiv.org/pdf/2106.05139">pdf</a>, <a href="https://arxiv.org/format/2106.05139">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Pretrained Encoders are All You Need </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khan%2C+M">Mina Khan</a>, <a href="/search/cs?searchtype=author&query=Srivatsa%2C+P">P Srivatsa</a>, <a href="/search/cs?searchtype=author&query=Rane%2C+A">Advait Rane</a>, <a href="/search/cs?searchtype=author&query=Chenniappa%2C+S">Shriram Chenniappa</a>, <a href="/search/cs?searchtype=author&query=Anand%2C+R">Rishabh Anand</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Maes%2C+P">Pattie Maes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.05139v1-abstract-short" style="display: inline;"> Data-efficiency and generalization are key challenges in deep learning and deep reinforcement learning as many models are trained on large-scale, domain-specific, and expensive-to-label datasets. Self-supervised models trained on large-scale uncurated datasets have shown successful transfer to diverse settings. We investigate using pretrained image representations and spatio-temporal attention for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.05139v1-abstract-full').style.display = 'inline'; document.getElementById('2106.05139v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.05139v1-abstract-full" style="display: none;"> Data-efficiency and generalization are key challenges in deep learning and deep reinforcement learning as many models are trained on large-scale, domain-specific, and expensive-to-label datasets. Self-supervised models trained on large-scale uncurated datasets have shown successful transfer to diverse settings. We investigate using pretrained image representations and spatio-temporal attention for state representation learning in Atari. We also explore fine-tuning pretrained representations with self-supervised techniques, i.e., contrastive predictive coding, spatio-temporal contrastive learning, and augmentations. Our results show that pretrained representations are at par with state-of-the-art self-supervised methods trained on domain-specific data. Pretrained representations, thus, yield data and compute-efficient state representations. https://github.com/PAL-ML/PEARL_v1 <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.05139v1-abstract-full').style.display = 'none'; document.getElementById('2106.05139v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.04615">arXiv:2106.04615</a> <span> [<a href="https://arxiv.org/pdf/2106.04615">pdf</a>, <a href="https://arxiv.org/format/2106.04615">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Vector Quantized Models for Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yazhe Li</a>, <a href="/search/cs?searchtype=author&query=Razavi%2C+A">Ali Razavi</a>, <a href="/search/cs?searchtype=author&query=Antonoglou%2C+I">Ioannis Antonoglou</a>, <a href="/search/cs?searchtype=author&query=Oord%2C+A+v+d">A盲ron van den Oord</a>, <a href="/search/cs?searchtype=author&query=Vinyals%2C+O">Oriol Vinyals</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.04615v2-abstract-short" style="display: inline;"> Recent developments in the field of model-based RL have proven successful in a range of environments, especially ones where planning is essential. However, such successes have been limited to deterministic fully-observed environments. We present a new approach that handles stochastic and partially-observable environments. Our key insight is to use discrete autoencoders to capture the multiple poss… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.04615v2-abstract-full').style.display = 'inline'; document.getElementById('2106.04615v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.04615v2-abstract-full" style="display: none;"> Recent developments in the field of model-based RL have proven successful in a range of environments, especially ones where planning is essential. However, such successes have been limited to deterministic fully-observed environments. We present a new approach that handles stochastic and partially-observable environments. Our key insight is to use discrete autoencoders to capture the multiple possible effects of an action in a stochastic environment. We use a stochastic variant of Monte Carlo tree search to plan over both the agent's actions and the discrete latent variables representing the environment's response. Our approach significantly outperforms an offline version of MuZero on a stochastic interpretation of chess where the opponent is considered part of the environment. We also show that our approach scales to DeepMind Lab, a first-person 3D environment with large visual observations and partial observability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.04615v2-abstract-full').style.display = 'none'; document.getElementById('2106.04615v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.11570">arXiv:1912.11570</a> <span> [<a href="https://arxiv.org/pdf/1912.11570">pdf</a>, <a href="https://arxiv.org/format/1912.11570">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> SketchTransfer: A Challenging New Task for Exploring Detail-Invariance and the Abstractions Learned by Deep Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lamb%2C+A">Alex Lamb</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Verma%2C+V">Vikas Verma</a>, <a href="/search/cs?searchtype=author&query=Ha%2C+D">David Ha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.11570v1-abstract-short" style="display: inline;"> Deep networks have achieved excellent results in perceptual tasks, yet their ability to generalize to variations not seen during training has come under increasing scrutiny. In this work we focus on their ability to have invariance towards the presence or absence of details. For example, humans are able to watch cartoons, which are missing many visual details, without being explicitly trained to d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.11570v1-abstract-full').style.display = 'inline'; document.getElementById('1912.11570v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.11570v1-abstract-full" style="display: none;"> Deep networks have achieved excellent results in perceptual tasks, yet their ability to generalize to variations not seen during training has come under increasing scrutiny. In this work we focus on their ability to have invariance towards the presence or absence of details. For example, humans are able to watch cartoons, which are missing many visual details, without being explicitly trained to do so. As another example, 3D rendering software is a relatively recent development, yet people are able to understand such rendered scenes even though they are missing details (consider a film like Toy Story). The failure of machine learning algorithms to do this indicates a significant gap in generalization between human abilities and the abilities of deep networks. We propose a dataset that will make it easier to study the detail-invariance problem concretely. We produce a concrete task for this: SketchTransfer, and we show that state-of-the-art domain transfer algorithms still struggle with this task. The state-of-the-art technique which achieves over 95\% on MNIST $\xrightarrow{}$ SVHN transfer only achieves 59\% accuracy on the SketchTransfer task, which is much better than random (11\% accuracy) but falls short of the 87\% accuracy of a classifier trained directly on labeled sketches. This indicates that this task is approachable with today's best methods but has substantial room for improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.11570v1-abstract-full').style.display = 'none'; document.getElementById('1912.11570v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted WACV 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.08226">arXiv:1906.08226</a> <span> [<a href="https://arxiv.org/pdf/1906.08226">pdf</a>, <a href="https://arxiv.org/format/1906.08226">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised State Representation Learning in Atari </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Anand%2C+A">Ankesh Anand</a>, <a href="/search/cs?searchtype=author&query=Racah%2C+E">Evan Racah</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a>, <a href="/search/cs?searchtype=author&query=C%C3%B4t%C3%A9%2C+M">Marc-Alexandre C么t茅</a>, <a href="/search/cs?searchtype=author&query=Hjelm%2C+R+D">R Devon Hjelm</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.08226v6-abstract-short" style="display: inline;"> State representation learning, or the ability to capture latent generative factors of an environment, is crucial for building intelligent agents that can perform a wide variety of tasks. Learning such representations without supervision from rewards is a challenging open problem. We introduce a method that learns state representations by maximizing mutual information across spatially and temporall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.08226v6-abstract-full').style.display = 'inline'; document.getElementById('1906.08226v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.08226v6-abstract-full" style="display: none;"> State representation learning, or the ability to capture latent generative factors of an environment, is crucial for building intelligent agents that can perform a wide variety of tasks. Learning such representations without supervision from rewards is a challenging open problem. We introduce a method that learns state representations by maximizing mutual information across spatially and temporally distinct features of a neural encoder of the observations. We also introduce a new benchmark based on Atari 2600 games where we evaluate representations based on how well they capture the ground truth state variables. We believe this new framework for evaluating representation learning models will be crucial for future representation learning research. Finally, we compare our technique with other state-of-the-art generative and contrastive representation learning methods. The code associated with this work is available at https://github.com/mila-iqia/atari-representation-learning <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.08226v6-abstract-full').style.display = 'none'; document.getElementById('1906.08226v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2019; v6 fixes a broken figure reference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.09334">arXiv:1905.09334</a> <span> [<a href="https://arxiv.org/pdf/1905.09334">pdf</a>, <a href="https://arxiv.org/format/1905.09334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> The Journey is the Reward: Unsupervised Learning of Influential Trajectories </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Binas%2C+J">Jonathan Binas</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.09334v1-abstract-short" style="display: inline;"> Unsupervised exploration and representation learning become increasingly important when learning in diverse and sparse environments. The information-theoretic principle of empowerment formalizes an unsupervised exploration objective through an agent trying to maximize its influence on the future states of its environment. Previous approaches carry certain limitations in that they either do not emp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.09334v1-abstract-full').style.display = 'inline'; document.getElementById('1905.09334v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.09334v1-abstract-full" style="display: none;"> Unsupervised exploration and representation learning become increasingly important when learning in diverse and sparse environments. The information-theoretic principle of empowerment formalizes an unsupervised exploration objective through an agent trying to maximize its influence on the future states of its environment. Previous approaches carry certain limitations in that they either do not employ closed-loop feedback or do not have an internal state. As a consequence, a privileged final state is taken as an influence measure, rather than the full trajectory. We provide a model-free method which takes into account the whole trajectory while still offering the benefits of option-based approaches. We successfully apply our approach to settings with large action spaces, where discovery of meaningful action sequences is particularly difficult. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.09334v1-abstract-full').style.display = 'none'; document.getElementById('1905.09334v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML'19 ERL Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.06922">arXiv:1905.06922</a> <span> [<a href="https://arxiv.org/pdf/1905.06922">pdf</a>, <a href="https://arxiv.org/format/1905.06922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> On Variational Bounds of Mutual Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Poole%2C+B">Ben Poole</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Oord%2C+A+v+d">Aaron van den Oord</a>, <a href="/search/cs?searchtype=author&query=Alemi%2C+A+A">Alexander A. Alemi</a>, <a href="/search/cs?searchtype=author&query=Tucker%2C+G">George Tucker</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.06922v1-abstract-short" style="display: inline;"> Estimating and optimizing Mutual Information (MI) is core to many problems in machine learning; however, bounding MI in high dimensions is challenging. To establish tractable and scalable objectives, recent work has turned to variational bounds parameterized by neural networks, but the relationships and tradeoffs between these bounds remains unclear. In this work, we unify these recent development… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.06922v1-abstract-full').style.display = 'inline'; document.getElementById('1905.06922v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.06922v1-abstract-full" style="display: none;"> Estimating and optimizing Mutual Information (MI) is core to many problems in machine learning; however, bounding MI in high dimensions is challenging. To establish tractable and scalable objectives, recent work has turned to variational bounds parameterized by neural networks, but the relationships and tradeoffs between these bounds remains unclear. In this work, we unify these recent developments in a single framework. We find that the existing variational lower bounds degrade when the MI is large, exhibiting either high bias or high variance. To address this problem, we introduce a continuum of lower bounds that encompasses previous bounds and flexibly trades off bias and variance. On high-dimensional, controlled problems, we empirically characterize the bias and variance of the bounds and their gradients and demonstrate the effectiveness of our new bounds for estimation and representation learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.06922v1-abstract-full').style.display = 'none'; document.getElementById('1905.06922v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.11780">arXiv:1903.11780</a> <span> [<a href="https://arxiv.org/pdf/1903.11780">pdf</a>, <a href="https://arxiv.org/format/1903.11780">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Wasserstein Dependency Measure for Representation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Lynch%2C+C">Corey Lynch</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a>, <a href="/search/cs?searchtype=author&query=Oord%2C+A+v+d">Aaron van den Oord</a>, <a href="/search/cs?searchtype=author&query=Levine%2C+S">Sergey Levine</a>, <a href="/search/cs?searchtype=author&query=Sermanet%2C+P">Pierre Sermanet</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.11780v1-abstract-short" style="display: inline;"> Mutual information maximization has emerged as a powerful learning objective for unsupervised representation learning obtaining state-of-the-art performance in applications such as object recognition, speech recognition, and reinforcement learning. However, such approaches are fundamentally limited since a tight lower bound of mutual information requires sample size exponential in the mutual infor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.11780v1-abstract-full').style.display = 'inline'; document.getElementById('1903.11780v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.11780v1-abstract-full" style="display: none;"> Mutual information maximization has emerged as a powerful learning objective for unsupervised representation learning obtaining state-of-the-art performance in applications such as object recognition, speech recognition, and reinforcement learning. However, such approaches are fundamentally limited since a tight lower bound of mutual information requires sample size exponential in the mutual information. This limits the applicability of these approaches for prediction tasks with high mutual information, such as in video understanding or reinforcement learning. In these settings, such techniques are prone to overfit, both in theory and in practice, and capture only a few of the relevant factors of variation. This leads to incomplete representations that are not optimal for downstream tasks. In this work, we empirically demonstrate that mutual information-based representation learning approaches do fail to learn complete representations on a number of designed and real-world tasks. To mitigate these problems we introduce the Wasserstein dependency measure, which learns more complete representations by using the Wasserstein distance instead of the KL divergence in the mutual information estimator. We show that a practical approximation to this theoretically motivated solution, constructed using Lipschitz constraint techniques from the GAN literature, achieves substantially improved results on tasks where incomplete representations are a major challenge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.11780v1-abstract-full').style.display = 'none'; document.getElementById('1903.11780v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1901.08508">arXiv:1901.08508</a> <span> [<a href="https://arxiv.org/pdf/1901.08508">pdf</a>, <a href="https://arxiv.org/format/1901.08508">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Maximum Entropy Generators for Energy-Based Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kumar%2C+R">Rithesh Kumar</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Goyal%2C+A">Anirudh Goyal</a>, <a href="/search/cs?searchtype=author&query=Courville%2C+A">Aaron Courville</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1901.08508v2-abstract-short" style="display: inline;"> Maximum likelihood estimation of energy-based models is a challenging problem due to the intractability of the log-likelihood gradient. In this work, we propose learning both the energy function and an amortized approximate sampling mechanism using a neural generator network, which provides an efficient approximation of the log-likelihood gradient. The resulting objective requires maximizing entro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.08508v2-abstract-full').style.display = 'inline'; document.getElementById('1901.08508v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1901.08508v2-abstract-full" style="display: none;"> Maximum likelihood estimation of energy-based models is a challenging problem due to the intractability of the log-likelihood gradient. In this work, we propose learning both the energy function and an amortized approximate sampling mechanism using a neural generator network, which provides an efficient approximation of the log-likelihood gradient. The resulting objective requires maximizing entropy of the generated samples, which we perform using recently proposed nonparametric mutual information estimators. Finally, to stabilize the resulting adversarial game, we use a zero-centered gradient penalty derived as a necessary condition from the score matching literature. The proposed technique can generate sharp images with Inception and FID scores competitive with recent GAN techniques, does not suffer from mode collapse, and is competitive with state-of-the-art anomaly detection techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1901.08508v2-abstract-full').style.display = 'none'; document.getElementById('1901.08508v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1801.04062">arXiv:1801.04062</a> <span> [<a href="https://arxiv.org/pdf/1801.04062">pdf</a>, <a href="https://arxiv.org/format/1801.04062">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> MINE: Mutual Information Neural Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Belghazi%2C+M+I">Mohamed Ishmael Belghazi</a>, <a href="/search/cs?searchtype=author&query=Baratin%2C+A">Aristide Baratin</a>, <a href="/search/cs?searchtype=author&query=Rajeswar%2C+S">Sai Rajeswar</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a>, <a href="/search/cs?searchtype=author&query=Courville%2C+A">Aaron Courville</a>, <a href="/search/cs?searchtype=author&query=Hjelm%2C+R+D">R Devon Hjelm</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1801.04062v5-abstract-short" style="display: inline;"> We argue that the estimation of mutual information between high dimensional continuous random variables can be achieved by gradient descent over neural networks. We present a Mutual Information Neural Estimator (MINE) that is linearly scalable in dimensionality as well as in sample size, trainable through back-prop, and strongly consistent. We present a handful of applications on which MINE can be… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.04062v5-abstract-full').style.display = 'inline'; document.getElementById('1801.04062v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1801.04062v5-abstract-full" style="display: none;"> We argue that the estimation of mutual information between high dimensional continuous random variables can be achieved by gradient descent over neural networks. We present a Mutual Information Neural Estimator (MINE) that is linearly scalable in dimensionality as well as in sample size, trainable through back-prop, and strongly consistent. We present a handful of applications on which MINE can be used to minimize or maximize mutual information. We apply MINE to improve adversarially trained generative models. We also use MINE to implement Information Bottleneck, applying it to supervised classification; our results demonstrate substantial improvement in flexibility and performance in these settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.04062v5-abstract-full').style.display = 'none'; document.getElementById('1801.04062v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ICML 2018 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1512.02595">arXiv:1512.02595</a> <span> [<a href="https://arxiv.org/pdf/1512.02595">pdf</a>, <a href="https://arxiv.org/format/1512.02595">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Deep Speech 2: End-to-End Speech Recognition in English and Mandarin </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Amodei%2C+D">Dario Amodei</a>, <a href="/search/cs?searchtype=author&query=Anubhai%2C+R">Rishita Anubhai</a>, <a href="/search/cs?searchtype=author&query=Battenberg%2C+E">Eric Battenberg</a>, <a href="/search/cs?searchtype=author&query=Case%2C+C">Carl Case</a>, <a href="/search/cs?searchtype=author&query=Casper%2C+J">Jared Casper</a>, <a href="/search/cs?searchtype=author&query=Catanzaro%2C+B">Bryan Catanzaro</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jingdong Chen</a>, <a href="/search/cs?searchtype=author&query=Chrzanowski%2C+M">Mike Chrzanowski</a>, <a href="/search/cs?searchtype=author&query=Coates%2C+A">Adam Coates</a>, <a href="/search/cs?searchtype=author&query=Diamos%2C+G">Greg Diamos</a>, <a href="/search/cs?searchtype=author&query=Elsen%2C+E">Erich Elsen</a>, <a href="/search/cs?searchtype=author&query=Engel%2C+J">Jesse Engel</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+L">Linxi Fan</a>, <a href="/search/cs?searchtype=author&query=Fougner%2C+C">Christopher Fougner</a>, <a href="/search/cs?searchtype=author&query=Han%2C+T">Tony Han</a>, <a href="/search/cs?searchtype=author&query=Hannun%2C+A">Awni Hannun</a>, <a href="/search/cs?searchtype=author&query=Jun%2C+B">Billy Jun</a>, <a href="/search/cs?searchtype=author&query=LeGresley%2C+P">Patrick LeGresley</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+L">Libby Lin</a>, <a href="/search/cs?searchtype=author&query=Narang%2C+S">Sharan Narang</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+A">Andrew Ng</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Prenger%2C+R">Ryan Prenger</a>, <a href="/search/cs?searchtype=author&query=Raiman%2C+J">Jonathan Raiman</a>, <a href="/search/cs?searchtype=author&query=Satheesh%2C+S">Sanjeev Satheesh</a> , et al. (9 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1512.02595v1-abstract-short" style="display: inline;"> We show that an end-to-end deep learning approach can be used to recognize either English or Mandarin Chinese speech--two vastly different languages. Because it replaces entire pipelines of hand-engineered components with neural networks, end-to-end learning allows us to handle a diverse variety of speech including noisy environments, accents and different languages. Key to our approach is our app… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1512.02595v1-abstract-full').style.display = 'inline'; document.getElementById('1512.02595v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1512.02595v1-abstract-full" style="display: none;"> We show that an end-to-end deep learning approach can be used to recognize either English or Mandarin Chinese speech--two vastly different languages. Because it replaces entire pipelines of hand-engineered components with neural networks, end-to-end learning allows us to handle a diverse variety of speech including noisy environments, accents and different languages. Key to our approach is our application of HPC techniques, resulting in a 7x speedup over our previous system. Because of this efficiency, experiments that previously took weeks now run in days. This enables us to iterate more quickly to identify superior architectures and algorithms. As a result, in several cases, our system is competitive with the transcription of human workers when benchmarked on standard datasets. Finally, using a technique called Batch Dispatch with GPUs in the data center, we show that our system can be inexpensively deployed in an online setting, delivering low latency when serving users at scale. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1512.02595v1-abstract-full').style.display = 'none'; document.getElementById('1512.02595v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2015. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1410.0630">arXiv:1410.0630</a> <span> [<a href="https://arxiv.org/pdf/1410.0630">pdf</a>, <a href="https://arxiv.org/format/1410.0630">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Deep Directed Generative Autoencoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1410.0630v1-abstract-short" style="display: inline;"> For discrete data, the likelihood $P(x)$ can be rewritten exactly and parametrized into $P(X = x) = P(X = x | H = f(x)) P(H = f(x))$ if $P(X | H)$ has enough capacity to put no probability mass on any $x'$ for which $f(x')\neq f(x)$, where $f(\cdot)$ is a deterministic discrete function. The log of the first factor gives rise to the log-likelihood reconstruction error of an autoencoder with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1410.0630v1-abstract-full').style.display = 'inline'; document.getElementById('1410.0630v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1410.0630v1-abstract-full" style="display: none;"> For discrete data, the likelihood $P(x)$ can be rewritten exactly and parametrized into $P(X = x) = P(X = x | H = f(x)) P(H = f(x))$ if $P(X | H)$ has enough capacity to put no probability mass on any $x'$ for which $f(x')\neq f(x)$, where $f(\cdot)$ is a deterministic discrete function. The log of the first factor gives rise to the log-likelihood reconstruction error of an autoencoder with $f(\cdot)$ as the encoder and $P(X|H)$ as the (probabilistic) decoder. The log of the second term can be seen as a regularizer on the encoded activations $h=f(x)$, e.g., as in sparse autoencoders. Both encoder and decoder can be represented by a deep neural network and trained to maximize the average of the optimal log-likelihood $\log p(x)$. The objective is to learn an encoder $f(\cdot)$ that maps $X$ to $f(X)$ that has a much simpler distribution than $X$ itself, estimated by $P(H)$. This "flattens the manifold" or concentrates probability mass in a smaller number of (relevant) dimensions over which the distribution factorizes. Generating samples from the model is straightforward using ancestral sampling. One challenge is that regular back-propagation cannot be used to obtain the gradient on the parameters of the encoder, but we find that using the straight-through estimator works well here. We also find that although optimizing a single level of such architecture may be difficult, much better results can be obtained by pre-training and stacking them, gradually transforming the data distribution into one that is more easily captured by a simple parametric model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1410.0630v1-abstract-full').style.display = 'none'; document.getElementById('1410.0630v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2014. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1409.0585">arXiv:1409.0585</a> <span> [<a href="https://arxiv.org/pdf/1409.0585">pdf</a>, <a href="https://arxiv.org/format/1409.0585">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On the Equivalence Between Deep NADE and Generative Stochastic Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+L">Li Yao</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+K">Kyunghyun Cho</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1409.0585v1-abstract-short" style="display: inline;"> Neural Autoregressive Distribution Estimators (NADEs) have recently been shown as successful alternatives for modeling high dimensional multimodal distributions. One issue associated with NADEs is that they rely on a particular order of factorization for $P(\mathbf{x})$. This issue has been recently addressed by a variant of NADE called Orderless NADEs and its deeper version, Deep Orderless NADE.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1409.0585v1-abstract-full').style.display = 'inline'; document.getElementById('1409.0585v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1409.0585v1-abstract-full" style="display: none;"> Neural Autoregressive Distribution Estimators (NADEs) have recently been shown as successful alternatives for modeling high dimensional multimodal distributions. One issue associated with NADEs is that they rely on a particular order of factorization for $P(\mathbf{x})$. This issue has been recently addressed by a variant of NADE called Orderless NADEs and its deeper version, Deep Orderless NADE. Orderless NADEs are trained based on a criterion that stochastically maximizes $P(\mathbf{x})$ with all possible orders of factorizations. Unfortunately, ancestral sampling from deep NADE is very expensive, corresponding to running through a neural net separately predicting each of the visible variables given some others. This work makes a connection between this criterion and the training criterion for Generative Stochastic Networks (GSNs). It shows that training NADEs in this way also trains a GSN, which defines a Markov chain associated with the NADE model. Based on this connection, we show an alternative way to sample from a trained Orderless NADE that allows to trade-off computing time and quality of the samples: a 3 to 10-fold speedup (taking into account the waste due to correlations between consecutive samples of the chain) can be obtained without noticeably reducing the quality of the samples. This is achieved using a novel sampling procedure for GSNs called annealed GSN sampling, similar to tempering methods that combines fast mixing (obtained thanks to steps at high noise levels) with accurate samples (obtained thanks to steps at low noise levels). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1409.0585v1-abstract-full').style.display = 'none'; document.getElementById('1409.0585v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECML/PKDD 2014</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1406.2661">arXiv:1406.2661</a> <span> [<a href="https://arxiv.org/pdf/1406.2661">pdf</a>, <a href="https://arxiv.org/format/1406.2661">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generative Adversarial Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Goodfellow%2C+I+J">Ian J. Goodfellow</a>, <a href="/search/cs?searchtype=author&query=Pouget-Abadie%2C+J">Jean Pouget-Abadie</a>, <a href="/search/cs?searchtype=author&query=Mirza%2C+M">Mehdi Mirza</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+B">Bing Xu</a>, <a href="/search/cs?searchtype=author&query=Warde-Farley%2C+D">David Warde-Farley</a>, <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Courville%2C+A">Aaron Courville</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1406.2661v1-abstract-short" style="display: inline;"> We propose a new framework for estimating generative models via an adversarial process, in which we simultaneously train two models: a generative model G that captures the data distribution, and a discriminative model D that estimates the probability that a sample came from the training data rather than G. The training procedure for G is to maximize the probability of D making a mistake. This fram… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1406.2661v1-abstract-full').style.display = 'inline'; document.getElementById('1406.2661v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1406.2661v1-abstract-full" style="display: none;"> We propose a new framework for estimating generative models via an adversarial process, in which we simultaneously train two models: a generative model G that captures the data distribution, and a discriminative model D that estimates the probability that a sample came from the training data rather than G. The training procedure for G is to maximize the probability of D making a mistake. This framework corresponds to a minimax two-player game. In the space of arbitrary functions G and D, a unique solution exists, with G recovering the training data distribution and D equal to 1/2 everywhere. In the case where G and D are defined by multilayer perceptrons, the entire system can be trained with backpropagation. There is no need for any Markov chains or unrolled approximate inference networks during either training or generation of samples. Experiments demonstrate the potential of the framework through qualitative and quantitative evaluation of the generated samples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1406.2661v1-abstract-full').style.display = 'none'; document.getElementById('1406.2661v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 June, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2014. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1312.5578">arXiv:1312.5578</a> <span> [<a href="https://arxiv.org/pdf/1312.5578">pdf</a>, <a href="https://arxiv.org/format/1312.5578">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Transitions for Generative Stochastic Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ozair%2C+S">Sherjil Ozair</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+L">Li Yao</a>, <a href="/search/cs?searchtype=author&query=Bengio%2C+Y">Yoshua Bengio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1312.5578v4-abstract-short" style="display: inline;"> Generative Stochastic Networks (GSNs) have been recently introduced as an alternative to traditional probabilistic modeling: instead of parametrizing the data distribution directly, one parametrizes a transition operator for a Markov chain whose stationary distribution is an estimator of the data generating distribution. The result of training is therefore a machine that generates samples through… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1312.5578v4-abstract-full').style.display = 'inline'; document.getElementById('1312.5578v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1312.5578v4-abstract-full" style="display: none;"> Generative Stochastic Networks (GSNs) have been recently introduced as an alternative to traditional probabilistic modeling: instead of parametrizing the data distribution directly, one parametrizes a transition operator for a Markov chain whose stationary distribution is an estimator of the data generating distribution. The result of training is therefore a machine that generates samples through this Markov chain. However, the previously introduced GSN consistency theorems suggest that in order to capture a wide class of distributions, the transition operator in general should be multimodal, something that has not been done before this paper. We introduce for the first time multimodal transition distributions for GSNs, in particular using models in the NADE family (Neural Autoregressive Density Estimator) as output distributions of the transition operator. A NADE model is related to an RBM (and can thus model multimodal distributions) but its likelihood (and likelihood gradient) can be computed easily. The parameters of the NADE are obtained as a learned function of the previous state of the learned Markov chain. Experiments clearly illustrate the advantage of such multimodal transition distributions over unimodal GSNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1312.5578v4-abstract-full').style.display = 'none'; document.getElementById('1312.5578v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2014; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 December, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 figures, 9 pages, submitted to ICLR14</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository