Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–10 of 10 results for author: <span class="mathjax">Isard, M</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Isard%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Isard, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Isard%2C+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Isard, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05530">arXiv:2403.05530</a> <span> [<a href="https://arxiv.org/pdf/2403.05530">pdf</a>, <a href="https://arxiv.org/format/2403.05530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+V+I">Ving Ian Lei</a>, <a href="/search/cs?searchtype=author&query=Burnell%2C+R">Ryan Burnell</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Libin Bai</a>, <a href="/search/cs?searchtype=author&query=Gulati%2C+A">Anmol Gulati</a>, <a href="/search/cs?searchtype=author&query=Tanzer%2C+G">Garrett Tanzer</a>, <a href="/search/cs?searchtype=author&query=Vincent%2C+D">Damien Vincent</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zhufeng Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shibo Wang</a>, <a href="/search/cs?searchtype=author&query=Mariooryad%2C+S">Soroosh Mariooryad</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yifan Ding</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+X">Xinyang Geng</a>, <a href="/search/cs?searchtype=author&query=Alcober%2C+F">Fred Alcober</a>, <a href="/search/cs?searchtype=author&query=Frostig%2C+R">Roy Frostig</a>, <a href="/search/cs?searchtype=author&query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&query=Walker%2C+L">Lexi Walker</a>, <a href="/search/cs?searchtype=author&query=Paduraru%2C+C">Cosmin Paduraru</a>, <a href="/search/cs?searchtype=author&query=Sorokin%2C+C">Christina Sorokin</a>, <a href="/search/cs?searchtype=author&query=Tacchetti%2C+A">Andrea Tacchetti</a>, <a href="/search/cs?searchtype=author&query=Gaffney%2C+C">Colin Gaffney</a>, <a href="/search/cs?searchtype=author&query=Daruki%2C+S">Samira Daruki</a>, <a href="/search/cs?searchtype=author&query=Sercinoglu%2C+O">Olcan Sercinoglu</a>, <a href="/search/cs?searchtype=author&query=Gleicher%2C+Z">Zach Gleicher</a>, <a href="/search/cs?searchtype=author&query=Love%2C+J">Juliette Love</a> , et al. (1110 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05530v4-abstract-short" style="display: inline;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v4-abstract-full').style.display = 'inline'; document.getElementById('2403.05530v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05530v4-abstract-full" style="display: none;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February version on the great majority of capabilities and benchmarks; (2) Gemini 1.5 Flash, a more lightweight variant designed for efficiency with minimal regression in quality. Gemini 1.5 models achieve near-perfect recall on long-context retrieval tasks across modalities, improve the state-of-the-art in long-document QA, long-video QA and long-context ASR, and match or surpass Gemini 1.0 Ultra's state-of-the-art performance across a broad set of benchmarks. Studying the limits of Gemini 1.5's long-context ability, we find continued improvement in next-token prediction and near-perfect retrieval (>99%) up to at least 10M tokens, a generational leap over existing models such as Claude 3.0 (200k) and GPT-4 Turbo (128k). Finally, we highlight real-world use cases, such as Gemini 1.5 collaborating with professionals on completing their tasks achieving 26 to 75% time savings across 10 different job categories, as well as surprising new capabilities of large language models at the frontier; when given a grammar manual for Kalamang, a language with fewer than 200 speakers worldwide, the model learns to translate English to Kalamang at a similar level to a person who learned from the same content. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v4-abstract-full').style.display = 'none'; document.getElementById('2403.05530v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11805">arXiv:2312.11805</a> <span> [<a href="https://arxiv.org/pdf/2312.11805">pdf</a>, <a href="https://arxiv.org/format/2312.11805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Gemini: A Family of Highly Capable Multimodal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&query=Borgeaud%2C+S">Sebastian Borgeaud</a>, <a href="/search/cs?searchtype=author&query=Alayrac%2C+J">Jean-Baptiste Alayrac</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jiahui Yu</a>, <a href="/search/cs?searchtype=author&query=Soricut%2C+R">Radu Soricut</a>, <a href="/search/cs?searchtype=author&query=Schalkwyk%2C+J">Johan Schalkwyk</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&query=Hauth%2C+A">Anja Hauth</a>, <a href="/search/cs?searchtype=author&query=Millican%2C+K">Katie Millican</a>, <a href="/search/cs?searchtype=author&query=Silver%2C+D">David Silver</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&query=Antonoglou%2C+I">Ioannis Antonoglou</a>, <a href="/search/cs?searchtype=author&query=Schrittwieser%2C+J">Julian Schrittwieser</a>, <a href="/search/cs?searchtype=author&query=Glaese%2C+A">Amelia Glaese</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jilin Chen</a>, <a href="/search/cs?searchtype=author&query=Pitler%2C+E">Emily Pitler</a>, <a href="/search/cs?searchtype=author&query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&query=Lazaridou%2C+A">Angeliki Lazaridou</a>, <a href="/search/cs?searchtype=author&query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&query=Molloy%2C+J">James Molloy</a>, <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Barham%2C+P+R">Paul R. Barham</a>, <a href="/search/cs?searchtype=author&query=Hennigan%2C+T">Tom Hennigan</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+B">Benjamin Lee</a> , et al. (1325 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11805v4-abstract-short" style="display: inline;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'inline'; document.getElementById('2312.11805v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11805v4-abstract-full" style="display: none;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultra model advances the state of the art in 30 of 32 of these benchmarks - notably being the first model to achieve human-expert performance on the well-studied exam benchmark MMLU, and improving the state of the art in every one of the 20 multimodal benchmarks we examined. We believe that the new capabilities of the Gemini family in cross-modal reasoning and language understanding will enable a wide variety of use cases. We discuss our approach toward post-training and deploying Gemini models responsibly to users through services including Gemini, Gemini Advanced, Google AI Studio, and Cloud Vertex AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'none'; document.getElementById('2312.11805v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10403">arXiv:2305.10403</a> <span> [<a href="https://arxiv.org/pdf/2305.10403">pdf</a>, <a href="https://arxiv.org/format/2305.10403">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PaLM 2 Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&query=Lepikhin%2C+D">Dmitry Lepikhin</a>, <a href="/search/cs?searchtype=author&query=Passos%2C+A">Alexandre Passos</a>, <a href="/search/cs?searchtype=author&query=Shakeri%2C+S">Siamak Shakeri</a>, <a href="/search/cs?searchtype=author&query=Taropa%2C+E">Emanuel Taropa</a>, <a href="/search/cs?searchtype=author&query=Bailey%2C+P">Paige Bailey</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhifeng Chen</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+E">Eric Chu</a>, <a href="/search/cs?searchtype=author&query=Clark%2C+J+H">Jonathan H. Clark</a>, <a href="/search/cs?searchtype=author&query=Shafey%2C+L+E">Laurent El Shafey</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yanping Huang</a>, <a href="/search/cs?searchtype=author&query=Meier-Hellstern%2C+K">Kathy Meier-Hellstern</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+G">Gaurav Mishra</a>, <a href="/search/cs?searchtype=author&query=Moreira%2C+E">Erica Moreira</a>, <a href="/search/cs?searchtype=author&query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&query=Robinson%2C+K">Kevin Robinson</a>, <a href="/search/cs?searchtype=author&query=Ruder%2C+S">Sebastian Ruder</a>, <a href="/search/cs?searchtype=author&query=Tay%2C+Y">Yi Tay</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+K">Kefan Xiao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yuanzhong Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yujing Zhang</a>, <a href="/search/cs?searchtype=author&query=Abrego%2C+G+H">Gustavo Hernandez Abrego</a> , et al. (103 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10403v3-abstract-short" style="display: inline;"> We introduce PaLM 2, a new state-of-the-art language model that has better multilingual and reasoning capabilities and is more compute-efficient than its predecessor PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives. Through extensive evaluations on English and multilingual language, and reasoning tasks, we demonstrate that PaLM 2 has significantly improved quality on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10403v3-abstract-full').style.display = 'inline'; document.getElementById('2305.10403v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10403v3-abstract-full" style="display: none;"> We introduce PaLM 2, a new state-of-the-art language model that has better multilingual and reasoning capabilities and is more compute-efficient than its predecessor PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives. Through extensive evaluations on English and multilingual language, and reasoning tasks, we demonstrate that PaLM 2 has significantly improved quality on downstream tasks across different model sizes, while simultaneously exhibiting faster and more efficient inference compared to PaLM. This improved efficiency enables broader deployment while also allowing the model to respond faster, for a more natural pace of interaction. PaLM 2 demonstrates robust reasoning capabilities exemplified by large improvements over PaLM on BIG-Bench and other reasoning tasks. PaLM 2 exhibits stable performance on a suite of responsible AI evaluations, and enables inference-time control over toxicity without additional overhead or impact on other capabilities. Overall, PaLM 2 achieves state-of-the-art performance across a diverse set of tasks and capabilities. When discussing the PaLM 2 family, it is important to distinguish between pre-trained models (of various sizes), fine-tuned variants of these models, and the user-facing products that use these models. In particular, user-facing products typically include additional pre- and post-processing steps. Additionally, the underlying models may evolve over time. Therefore, one should not expect the performance of user-facing products to exactly match the results reported in this report. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10403v3-abstract-full').style.display = 'none'; document.getElementById('2305.10403v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.02311">arXiv:2204.02311</a> <span> [<a href="https://arxiv.org/pdf/2204.02311">pdf</a>, <a href="https://arxiv.org/format/2204.02311">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PaLM: Scaling Language Modeling with Pathways </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chowdhery%2C+A">Aakanksha Chowdhery</a>, <a href="/search/cs?searchtype=author&query=Narang%2C+S">Sharan Narang</a>, <a href="/search/cs?searchtype=author&query=Devlin%2C+J">Jacob Devlin</a>, <a href="/search/cs?searchtype=author&query=Bosma%2C+M">Maarten Bosma</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+G">Gaurav Mishra</a>, <a href="/search/cs?searchtype=author&query=Roberts%2C+A">Adam Roberts</a>, <a href="/search/cs?searchtype=author&query=Barham%2C+P">Paul Barham</a>, <a href="/search/cs?searchtype=author&query=Chung%2C+H+W">Hyung Won Chung</a>, <a href="/search/cs?searchtype=author&query=Sutton%2C+C">Charles Sutton</a>, <a href="/search/cs?searchtype=author&query=Gehrmann%2C+S">Sebastian Gehrmann</a>, <a href="/search/cs?searchtype=author&query=Schuh%2C+P">Parker Schuh</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+K">Kensen Shi</a>, <a href="/search/cs?searchtype=author&query=Tsvyashchenko%2C+S">Sasha Tsvyashchenko</a>, <a href="/search/cs?searchtype=author&query=Maynez%2C+J">Joshua Maynez</a>, <a href="/search/cs?searchtype=author&query=Rao%2C+A">Abhishek Rao</a>, <a href="/search/cs?searchtype=author&query=Barnes%2C+P">Parker Barnes</a>, <a href="/search/cs?searchtype=author&query=Tay%2C+Y">Yi Tay</a>, <a href="/search/cs?searchtype=author&query=Shazeer%2C+N">Noam Shazeer</a>, <a href="/search/cs?searchtype=author&query=Prabhakaran%2C+V">Vinodkumar Prabhakaran</a>, <a href="/search/cs?searchtype=author&query=Reif%2C+E">Emily Reif</a>, <a href="/search/cs?searchtype=author&query=Du%2C+N">Nan Du</a>, <a href="/search/cs?searchtype=author&query=Hutchinson%2C+B">Ben Hutchinson</a>, <a href="/search/cs?searchtype=author&query=Pope%2C+R">Reiner Pope</a>, <a href="/search/cs?searchtype=author&query=Bradbury%2C+J">James Bradbury</a>, <a href="/search/cs?searchtype=author&query=Austin%2C+J">Jacob Austin</a> , et al. (42 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.02311v5-abstract-short" style="display: inline;"> Large language models have been shown to achieve remarkable performance across a variety of natural language tasks using few-shot learning, which drastically reduces the number of task-specific training examples needed to adapt the model to a particular application. To further our understanding of the impact of scale on few-shot learning, we trained a 540-billion parameter, densely activated, Tran… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02311v5-abstract-full').style.display = 'inline'; document.getElementById('2204.02311v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.02311v5-abstract-full" style="display: none;"> Large language models have been shown to achieve remarkable performance across a variety of natural language tasks using few-shot learning, which drastically reduces the number of task-specific training examples needed to adapt the model to a particular application. To further our understanding of the impact of scale on few-shot learning, we trained a 540-billion parameter, densely activated, Transformer language model, which we call Pathways Language Model PaLM. We trained PaLM on 6144 TPU v4 chips using Pathways, a new ML system which enables highly efficient training across multiple TPU Pods. We demonstrate continued benefits of scaling by achieving state-of-the-art few-shot learning results on hundreds of language understanding and generation benchmarks. On a number of these tasks, PaLM 540B achieves breakthrough performance, outperforming the finetuned state-of-the-art on a suite of multi-step reasoning tasks, and outperforming average human performance on the recently released BIG-bench benchmark. A significant number of BIG-bench tasks showed discontinuous improvements from model scale, meaning that performance steeply increased as we scaled to our largest model. PaLM also has strong capabilities in multilingual tasks and source code generation, which we demonstrate on a wide array of benchmarks. We additionally provide a comprehensive analysis on bias and toxicity, and study the extent of training data memorization with respect to model scale. Finally, we discuss the ethical considerations related to large language models and discuss potential mitigation strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02311v5-abstract-full').style.display = 'none'; document.getElementById('2204.02311v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.12533">arXiv:2203.12533</a> <span> [<a href="https://arxiv.org/pdf/2203.12533">pdf</a>, <a href="https://arxiv.org/format/2203.12533">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Pathways: Asynchronous Distributed Dataflow for ML </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Barham%2C+P">Paul Barham</a>, <a href="/search/cs?searchtype=author&query=Chowdhery%2C+A">Aakanksha Chowdhery</a>, <a href="/search/cs?searchtype=author&query=Dean%2C+J">Jeff Dean</a>, <a href="/search/cs?searchtype=author&query=Ghemawat%2C+S">Sanjay Ghemawat</a>, <a href="/search/cs?searchtype=author&query=Hand%2C+S">Steven Hand</a>, <a href="/search/cs?searchtype=author&query=Hurt%2C+D">Dan Hurt</a>, <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+H">Hyeontaek Lim</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+R">Ruoming Pang</a>, <a href="/search/cs?searchtype=author&query=Roy%2C+S">Sudip Roy</a>, <a href="/search/cs?searchtype=author&query=Saeta%2C+B">Brennan Saeta</a>, <a href="/search/cs?searchtype=author&query=Schuh%2C+P">Parker Schuh</a>, <a href="/search/cs?searchtype=author&query=Sepassi%2C+R">Ryan Sepassi</a>, <a href="/search/cs?searchtype=author&query=Shafey%2C+L+E">Laurent El Shafey</a>, <a href="/search/cs?searchtype=author&query=Thekkath%2C+C+A">Chandramohan A. Thekkath</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yonghui Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.12533v1-abstract-short" style="display: inline;"> We present the design of a new large scale orchestration layer for accelerators. Our system, Pathways, is explicitly designed to enable exploration of new systems and ML research ideas, while retaining state of the art performance for current models. Pathways uses a sharded dataflow graph of asynchronous operators that consume and produce futures, and efficiently gang-schedules heterogeneous paral… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.12533v1-abstract-full').style.display = 'inline'; document.getElementById('2203.12533v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.12533v1-abstract-full" style="display: none;"> We present the design of a new large scale orchestration layer for accelerators. Our system, Pathways, is explicitly designed to enable exploration of new systems and ML research ideas, while retaining state of the art performance for current models. Pathways uses a sharded dataflow graph of asynchronous operators that consume and produce futures, and efficiently gang-schedules heterogeneous parallel computations on thousands of accelerators while coordinating data transfers over their dedicated interconnects. Pathways makes use of a novel asynchronous distributed dataflow design that lets the control plane execute in parallel despite dependencies in the data plane. This design, with careful engineering, allows Pathways to adopt a single-controller model that makes it easier to express complex new parallelism patterns. We demonstrate that Pathways can achieve performance parity (~100% accelerator utilization) with state-of-the-art systems when running SPMD computations over 2048 TPUs, while also delivering throughput comparable to the SPMD case for Transformer models that are pipelined across 16 stages, or sharded across two islands of accelerators connected over a data center network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.12533v1-abstract-full').style.display = 'none'; document.getElementById('2203.12533v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MLSys 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.01772">arXiv:1805.01772</a> <span> [<a href="https://arxiv.org/pdf/1805.01772">pdf</a>, <a href="https://arxiv.org/format/1805.01772">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3190508.3190551">10.1145/3190508.3190551 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dynamic Control Flow in Large-Scale Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yuan Yu</a>, <a href="/search/cs?searchtype=author&query=Abadi%2C+M">Mart铆n Abadi</a>, <a href="/search/cs?searchtype=author&query=Barham%2C+P">Paul Barham</a>, <a href="/search/cs?searchtype=author&query=Brevdo%2C+E">Eugene Brevdo</a>, <a href="/search/cs?searchtype=author&query=Burrows%2C+M">Mike Burrows</a>, <a href="/search/cs?searchtype=author&query=Davis%2C+A">Andy Davis</a>, <a href="/search/cs?searchtype=author&query=Dean%2C+J">Jeff Dean</a>, <a href="/search/cs?searchtype=author&query=Ghemawat%2C+S">Sanjay Ghemawat</a>, <a href="/search/cs?searchtype=author&query=Harley%2C+T">Tim Harley</a>, <a href="/search/cs?searchtype=author&query=Hawkins%2C+P">Peter Hawkins</a>, <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Kudlur%2C+M">Manjunath Kudlur</a>, <a href="/search/cs?searchtype=author&query=Monga%2C+R">Rajat Monga</a>, <a href="/search/cs?searchtype=author&query=Murray%2C+D">Derek Murray</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xiaoqiang Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.01772v1-abstract-short" style="display: inline;"> Many recent machine learning models rely on fine-grained dynamic control flow for training and inference. In particular, models based on recurrent neural networks and on reinforcement learning depend on recurrence relations, data-dependent conditional execution, and other features that call for dynamic control flow. These applications benefit from the ability to make rapid control-flow decisions a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.01772v1-abstract-full').style.display = 'inline'; document.getElementById('1805.01772v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.01772v1-abstract-full" style="display: none;"> Many recent machine learning models rely on fine-grained dynamic control flow for training and inference. In particular, models based on recurrent neural networks and on reinforcement learning depend on recurrence relations, data-dependent conditional execution, and other features that call for dynamic control flow. These applications benefit from the ability to make rapid control-flow decisions across a set of computing devices in a distributed system. For performance, scalability, and expressiveness, a machine learning system must support dynamic control flow in distributed and heterogeneous environments. This paper presents a programming model for distributed machine learning that supports dynamic control flow. We describe the design of the programming model, and its implementation in TensorFlow, a distributed machine learning system. Our approach extends the use of dataflow graphs to represent machine learning models, offering several distinctive features. First, the branches of conditionals and bodies of loops can be partitioned across many machines to run on a set of heterogeneous devices, including CPUs, GPUs, and custom ASICs. Second, programs written in our model support automatic differentiation and distributed gradient computations, which are necessary for training machine learning models that use control flow. Third, our choice of non-strict semantics enables multiple loop iterations to execute in parallel across machines, and to overlap compute and I/O operations. We have done our work in the context of TensorFlow, and it has been used extensively in research and production. We evaluate it using several real-world applications, and demonstrate its performance and scalability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.01772v1-abstract-full').style.display = 'none'; document.getElementById('1805.01772v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Appeared in EuroSys 2018. 14 pages, 16 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> EuroSys 2018: Thirteenth EuroSys Conference, April 23-26, 2018, Porto, Portugal. ACM, New York, NY, USA </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1605.08695">arXiv:1605.08695</a> <span> [<a href="https://arxiv.org/pdf/1605.08695">pdf</a>, <a href="https://arxiv.org/format/1605.08695">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TensorFlow: A system for large-scale machine learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Abadi%2C+M">Mart铆n Abadi</a>, <a href="/search/cs?searchtype=author&query=Barham%2C+P">Paul Barham</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jianmin Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhifeng Chen</a>, <a href="/search/cs?searchtype=author&query=Davis%2C+A">Andy Davis</a>, <a href="/search/cs?searchtype=author&query=Dean%2C+J">Jeffrey Dean</a>, <a href="/search/cs?searchtype=author&query=Devin%2C+M">Matthieu Devin</a>, <a href="/search/cs?searchtype=author&query=Ghemawat%2C+S">Sanjay Ghemawat</a>, <a href="/search/cs?searchtype=author&query=Irving%2C+G">Geoffrey Irving</a>, <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Kudlur%2C+M">Manjunath Kudlur</a>, <a href="/search/cs?searchtype=author&query=Levenberg%2C+J">Josh Levenberg</a>, <a href="/search/cs?searchtype=author&query=Monga%2C+R">Rajat Monga</a>, <a href="/search/cs?searchtype=author&query=Moore%2C+S">Sherry Moore</a>, <a href="/search/cs?searchtype=author&query=Murray%2C+D+G">Derek G. Murray</a>, <a href="/search/cs?searchtype=author&query=Steiner%2C+B">Benoit Steiner</a>, <a href="/search/cs?searchtype=author&query=Tucker%2C+P">Paul Tucker</a>, <a href="/search/cs?searchtype=author&query=Vasudevan%2C+V">Vijay Vasudevan</a>, <a href="/search/cs?searchtype=author&query=Warden%2C+P">Pete Warden</a>, <a href="/search/cs?searchtype=author&query=Wicke%2C+M">Martin Wicke</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yuan Yu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xiaoqiang Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1605.08695v2-abstract-short" style="display: inline;"> TensorFlow is a machine learning system that operates at large scale and in heterogeneous environments. TensorFlow uses dataflow graphs to represent computation, shared state, and the operations that mutate that state. It maps the nodes of a dataflow graph across many machines in a cluster, and within a machine across multiple computational devices, including multicore CPUs, general-purpose GPUs,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1605.08695v2-abstract-full').style.display = 'inline'; document.getElementById('1605.08695v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1605.08695v2-abstract-full" style="display: none;"> TensorFlow is a machine learning system that operates at large scale and in heterogeneous environments. TensorFlow uses dataflow graphs to represent computation, shared state, and the operations that mutate that state. It maps the nodes of a dataflow graph across many machines in a cluster, and within a machine across multiple computational devices, including multicore CPUs, general-purpose GPUs, and custom designed ASICs known as Tensor Processing Units (TPUs). This architecture gives flexibility to the application developer: whereas in previous "parameter server" designs the management of shared state is built into the system, TensorFlow enables developers to experiment with novel optimizations and training algorithms. TensorFlow supports a variety of applications, with particularly strong support for training and inference on deep neural networks. Several Google services use TensorFlow in production, we have released it as an open-source project, and it has become widely used for machine learning research. In this paper, we describe the TensorFlow dataflow model in contrast to existing systems, and demonstrate the compelling performance that TensorFlow achieves for several real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1605.08695v2-abstract-full').style.display = 'none'; document.getElementById('1605.08695v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 9 figures; v2 has a spelling correction in the metadata</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1603.04467">arXiv:1603.04467</a> <span> [<a href="https://arxiv.org/pdf/1603.04467">pdf</a>, <a href="https://arxiv.org/format/1603.04467">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Abadi%2C+M">Mart铆n Abadi</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Ashish Agarwal</a>, <a href="/search/cs?searchtype=author&query=Barham%2C+P">Paul Barham</a>, <a href="/search/cs?searchtype=author&query=Brevdo%2C+E">Eugene Brevdo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhifeng Chen</a>, <a href="/search/cs?searchtype=author&query=Citro%2C+C">Craig Citro</a>, <a href="/search/cs?searchtype=author&query=Corrado%2C+G+S">Greg S. Corrado</a>, <a href="/search/cs?searchtype=author&query=Davis%2C+A">Andy Davis</a>, <a href="/search/cs?searchtype=author&query=Dean%2C+J">Jeffrey Dean</a>, <a href="/search/cs?searchtype=author&query=Devin%2C+M">Matthieu Devin</a>, <a href="/search/cs?searchtype=author&query=Ghemawat%2C+S">Sanjay Ghemawat</a>, <a href="/search/cs?searchtype=author&query=Goodfellow%2C+I">Ian Goodfellow</a>, <a href="/search/cs?searchtype=author&query=Harp%2C+A">Andrew Harp</a>, <a href="/search/cs?searchtype=author&query=Irving%2C+G">Geoffrey Irving</a>, <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+Y">Yangqing Jia</a>, <a href="/search/cs?searchtype=author&query=Jozefowicz%2C+R">Rafal Jozefowicz</a>, <a href="/search/cs?searchtype=author&query=Kaiser%2C+L">Lukasz Kaiser</a>, <a href="/search/cs?searchtype=author&query=Kudlur%2C+M">Manjunath Kudlur</a>, <a href="/search/cs?searchtype=author&query=Levenberg%2C+J">Josh Levenberg</a>, <a href="/search/cs?searchtype=author&query=Mane%2C+D">Dan Mane</a>, <a href="/search/cs?searchtype=author&query=Monga%2C+R">Rajat Monga</a>, <a href="/search/cs?searchtype=author&query=Moore%2C+S">Sherry Moore</a>, <a href="/search/cs?searchtype=author&query=Murray%2C+D">Derek Murray</a>, <a href="/search/cs?searchtype=author&query=Olah%2C+C">Chris Olah</a> , et al. (15 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1603.04467v2-abstract-short" style="display: inline;"> TensorFlow is an interface for expressing machine learning algorithms, and an implementation for executing such algorithms. A computation expressed using TensorFlow can be executed with little or no change on a wide variety of heterogeneous systems, ranging from mobile devices such as phones and tablets up to large-scale distributed systems of hundreds of machines and thousands of computational de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1603.04467v2-abstract-full').style.display = 'inline'; document.getElementById('1603.04467v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1603.04467v2-abstract-full" style="display: none;"> TensorFlow is an interface for expressing machine learning algorithms, and an implementation for executing such algorithms. A computation expressed using TensorFlow can be executed with little or no change on a wide variety of heterogeneous systems, ranging from mobile devices such as phones and tablets up to large-scale distributed systems of hundreds of machines and thousands of computational devices such as GPU cards. The system is flexible and can be used to express a wide variety of algorithms, including training and inference algorithms for deep neural network models, and it has been used for conducting research and for deploying machine learning systems into production across more than a dozen areas of computer science and other fields, including speech recognition, computer vision, robotics, information retrieval, natural language processing, geographic information extraction, and computational drug discovery. This paper describes the TensorFlow interface and an implementation of that interface that we have built at Google. The TensorFlow API and a reference implementation were released as an open-source package under the Apache 2.0 license in November, 2015 and are available at www.tensorflow.org. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1603.04467v2-abstract-full').style.display = 'none'; document.getElementById('1603.04467v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 March, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 March, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Version 2 updates only the metadata, to correct the formatting of Mart铆n Abadi's name</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1503.08877">arXiv:1503.08877</a> <span> [<a href="https://arxiv.org/pdf/1503.08877">pdf</a>, <a href="https://arxiv.org/format/1503.08877">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Falkirk Wheel: Rollback Recovery for Dataflow Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Abadi%2C+M">Mart铆n Abadi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1503.08877v1-abstract-short" style="display: inline;"> We present a new model for rollback recovery in distributed dataflow systems. We explain existing rollback schemes by assigning a logical time to each event such as a message delivery. If some processors fail during an execution, the system rolls back by selecting a set of logical times for each processor. The effect of events at times within the set is retained or restored from saved state, while… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1503.08877v1-abstract-full').style.display = 'inline'; document.getElementById('1503.08877v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1503.08877v1-abstract-full" style="display: none;"> We present a new model for rollback recovery in distributed dataflow systems. We explain existing rollback schemes by assigning a logical time to each event such as a message delivery. If some processors fail during an execution, the system rolls back by selecting a set of logical times for each processor. The effect of events at times within the set is retained or restored from saved state, while the effect of other events is undone and re-executed. We show that, by adopting different logical time "domains" at different processors, an application can adopt appropriate checkpointing schemes for different parts of its computation. We illustrate with an example of an application that combines batch processing with low-latency streaming updates. We show rules, and an algorithm, to determine a globally consistent state for rollback in a system that uses multiple logical time domains. We also introduce selective rollback at a processor, which can selectively preserve the effect of events at some logical times and not others, independent of the original order of execution of those events. Selective rollback permits new checkpointing policies that are particularly well suited to iterative streaming algorithms. We report on an implementation of our new framework in the context of the Naiad system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1503.08877v1-abstract-full').style.display = 'none'; document.getElementById('1503.08877v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">DRAFT work in progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1212.4522">arXiv:1212.4522</a> <span> [<a href="https://arxiv.org/pdf/1212.4522">pdf</a>, <a href="https://arxiv.org/format/1212.4522">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> A Multi-View Embedding Space for Modeling Internet Images, Tags, and their Semantics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gong%2C+Y">Yunchao Gong</a>, <a href="/search/cs?searchtype=author&query=Ke%2C+Q">Qifa Ke</a>, <a href="/search/cs?searchtype=author&query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&query=Lazebnik%2C+S">Svetlana Lazebnik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1212.4522v2-abstract-short" style="display: inline;"> This paper investigates the problem of modeling Internet images and associated text or tags for tasks such as image-to-image search, tag-to-image search, and image-to-tag search (image annotation). We start with canonical correlation analysis (CCA), a popular and successful approach for mapping visual and textual features to the same latent space, and incorporate a third view capturing high-level… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1212.4522v2-abstract-full').style.display = 'inline'; document.getElementById('1212.4522v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1212.4522v2-abstract-full" style="display: none;"> This paper investigates the problem of modeling Internet images and associated text or tags for tasks such as image-to-image search, tag-to-image search, and image-to-tag search (image annotation). We start with canonical correlation analysis (CCA), a popular and successful approach for mapping visual and textual features to the same latent space, and incorporate a third view capturing high-level image semantics, represented either by a single category or multiple non-mutually-exclusive concepts. We present two ways to train the three-view embedding: supervised, with the third view coming from ground-truth labels or search keywords; and unsupervised, with semantic themes automatically obtained by clustering the tags. To ensure high accuracy for retrieval tasks while keeping the learning process scalable, we combine multiple strong visual features and use explicit nonlinear kernel mappings to efficiently approximate kernel CCA. To perform retrieval, we use a specially designed similarity function in the embedded space, which substantially outperforms the Euclidean distance. The resulting system produces compelling qualitative results and outperforms a number of two-view baselines on retrieval tasks on three large-scale Internet image datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1212.4522v2-abstract-full').style.display = 'none'; document.getElementById('1212.4522v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2013; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To Appear: International Journal of Computer Vision</span> </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository