CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;7 of 7 results for author: <span class="mathjax">Omernick, M</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Omernick%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Omernick, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Omernick%2C+M&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Omernick, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.05530">arXiv:2403.05530</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.05530">pdf</a>, <a href="https://arxiv.org/format/2403.05530">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&amp;query=Georgiev%2C+P">Petko Georgiev</a>, <a href="/search/cs?searchtype=author&amp;query=Lei%2C+V+I">Ving Ian Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Burnell%2C+R">Ryan Burnell</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+L">Libin Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Gulati%2C+A">Anmol Gulati</a>, <a href="/search/cs?searchtype=author&amp;query=Tanzer%2C+G">Garrett Tanzer</a>, <a href="/search/cs?searchtype=author&amp;query=Vincent%2C+D">Damien Vincent</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+Z">Zhufeng Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shibo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Mariooryad%2C+S">Soroosh Mariooryad</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Y">Yifan Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Geng%2C+X">Xinyang Geng</a>, <a href="/search/cs?searchtype=author&amp;query=Alcober%2C+F">Fred Alcober</a>, <a href="/search/cs?searchtype=author&amp;query=Frostig%2C+R">Roy Frostig</a>, <a href="/search/cs?searchtype=author&amp;query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&amp;query=Walker%2C+L">Lexi Walker</a>, <a href="/search/cs?searchtype=author&amp;query=Paduraru%2C+C">Cosmin Paduraru</a>, <a href="/search/cs?searchtype=author&amp;query=Sorokin%2C+C">Christina Sorokin</a>, <a href="/search/cs?searchtype=author&amp;query=Tacchetti%2C+A">Andrea Tacchetti</a>, <a href="/search/cs?searchtype=author&amp;query=Gaffney%2C+C">Colin Gaffney</a>, <a href="/search/cs?searchtype=author&amp;query=Daruki%2C+S">Samira Daruki</a>, <a href="/search/cs?searchtype=author&amp;query=Sercinoglu%2C+O">Olcan Sercinoglu</a>, <a href="/search/cs?searchtype=author&amp;query=Gleicher%2C+Z">Zach Gleicher</a>, <a href="/search/cs?searchtype=author&amp;query=Love%2C+J">Juliette Love</a> , et al. (1110 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.05530v4-abstract-short" style="display: inline;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v4-abstract-full').style.display = 'inline'; document.getElementById('2403.05530v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.05530v4-abstract-full" style="display: none;"> In this report, we introduce the Gemini 1.5 family of models, representing the next generation of highly compute-efficient multimodal models capable of recalling and reasoning over fine-grained information from millions of tokens of context, including multiple long documents and hours of video and audio. The family includes two new models: (1) an updated Gemini 1.5 Pro, which exceeds the February version on the great majority of capabilities and benchmarks; (2) Gemini 1.5 Flash, a more lightweight variant designed for efficiency with minimal regression in quality. Gemini 1.5 models achieve near-perfect recall on long-context retrieval tasks across modalities, improve the state-of-the-art in long-document QA, long-video QA and long-context ASR, and match or surpass Gemini 1.0 Ultra&#39;s state-of-the-art performance across a broad set of benchmarks. Studying the limits of Gemini 1.5&#39;s long-context ability, we find continued improvement in next-token prediction and near-perfect retrieval (&gt;99%) up to at least 10M tokens, a generational leap over existing models such as Claude 3.0 (200k) and GPT-4 Turbo (128k). Finally, we highlight real-world use cases, such as Gemini 1.5 collaborating with professionals on completing their tasks achieving 26 to 75% time savings across 10 different job categories, as well as surprising new capabilities of large language models at the frontier; when given a grammar manual for Kalamang, a language with fewer than 200 speakers worldwide, the model learns to translate English to Kalamang at a similar level to a person who learned from the same content. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.05530v4-abstract-full').style.display = 'none'; document.getElementById('2403.05530v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11805">arXiv:2312.11805</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.11805">pdf</a>, <a href="https://arxiv.org/format/2312.11805">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Gemini: A Family of Highly Capable Multimodal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gemini+Team"> Gemini Team</a>, <a href="/search/cs?searchtype=author&amp;query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&amp;query=Borgeaud%2C+S">Sebastian Borgeaud</a>, <a href="/search/cs?searchtype=author&amp;query=Alayrac%2C+J">Jean-Baptiste Alayrac</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">Jiahui Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Soricut%2C+R">Radu Soricut</a>, <a href="/search/cs?searchtype=author&amp;query=Schalkwyk%2C+J">Johan Schalkwyk</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Hauth%2C+A">Anja Hauth</a>, <a href="/search/cs?searchtype=author&amp;query=Millican%2C+K">Katie Millican</a>, <a href="/search/cs?searchtype=author&amp;query=Silver%2C+D">David Silver</a>, <a href="/search/cs?searchtype=author&amp;query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&amp;query=Antonoglou%2C+I">Ioannis Antonoglou</a>, <a href="/search/cs?searchtype=author&amp;query=Schrittwieser%2C+J">Julian Schrittwieser</a>, <a href="/search/cs?searchtype=author&amp;query=Glaese%2C+A">Amelia Glaese</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jilin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Pitler%2C+E">Emily Pitler</a>, <a href="/search/cs?searchtype=author&amp;query=Lillicrap%2C+T">Timothy Lillicrap</a>, <a href="/search/cs?searchtype=author&amp;query=Lazaridou%2C+A">Angeliki Lazaridou</a>, <a href="/search/cs?searchtype=author&amp;query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&amp;query=Molloy%2C+J">James Molloy</a>, <a href="/search/cs?searchtype=author&amp;query=Isard%2C+M">Michael Isard</a>, <a href="/search/cs?searchtype=author&amp;query=Barham%2C+P+R">Paul R. Barham</a>, <a href="/search/cs?searchtype=author&amp;query=Hennigan%2C+T">Tom Hennigan</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+B">Benjamin Lee</a> , et al. (1325 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11805v4-abstract-short" style="display: inline;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'inline'; document.getElementById('2312.11805v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11805v4-abstract-full" style="display: none;"> This report introduces a new family of multimodal models, Gemini, that exhibit remarkable capabilities across image, audio, video, and text understanding. The Gemini family consists of Ultra, Pro, and Nano sizes, suitable for applications ranging from complex reasoning tasks to on-device memory-constrained use-cases. Evaluation on a broad range of benchmarks shows that our most-capable Gemini Ultra model advances the state of the art in 30 of 32 of these benchmarks - notably being the first model to achieve human-expert performance on the well-studied exam benchmark MMLU, and improving the state of the art in every one of the 20 multimodal benchmarks we examined. We believe that the new capabilities of the Gemini family in cross-modal reasoning and language understanding will enable a wide variety of use cases. We discuss our approach toward post-training and deploying Gemini models responsibly to users through services including Gemini, Gemini Advanced, Google AI Studio, and Cloud Vertex AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11805v4-abstract-full').style.display = 'none'; document.getElementById('2312.11805v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10403">arXiv:2305.10403</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.10403">pdf</a>, <a href="https://arxiv.org/format/2305.10403">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PaLM 2 Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Anil%2C+R">Rohan Anil</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Firat%2C+O">Orhan Firat</a>, <a href="/search/cs?searchtype=author&amp;query=Johnson%2C+M">Melvin Johnson</a>, <a href="/search/cs?searchtype=author&amp;query=Lepikhin%2C+D">Dmitry Lepikhin</a>, <a href="/search/cs?searchtype=author&amp;query=Passos%2C+A">Alexandre Passos</a>, <a href="/search/cs?searchtype=author&amp;query=Shakeri%2C+S">Siamak Shakeri</a>, <a href="/search/cs?searchtype=author&amp;query=Taropa%2C+E">Emanuel Taropa</a>, <a href="/search/cs?searchtype=author&amp;query=Bailey%2C+P">Paige Bailey</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhifeng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chu%2C+E">Eric Chu</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+J+H">Jonathan H. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Shafey%2C+L+E">Laurent El Shafey</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yanping Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Meier-Hellstern%2C+K">Kathy Meier-Hellstern</a>, <a href="/search/cs?searchtype=author&amp;query=Mishra%2C+G">Gaurav Mishra</a>, <a href="/search/cs?searchtype=author&amp;query=Moreira%2C+E">Erica Moreira</a>, <a href="/search/cs?searchtype=author&amp;query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&amp;query=Robinson%2C+K">Kevin Robinson</a>, <a href="/search/cs?searchtype=author&amp;query=Ruder%2C+S">Sebastian Ruder</a>, <a href="/search/cs?searchtype=author&amp;query=Tay%2C+Y">Yi Tay</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+K">Kefan Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Y">Yuanzhong Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yujing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Abrego%2C+G+H">Gustavo Hernandez Abrego</a> , et al. (103 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10403v3-abstract-short" style="display: inline;"> We introduce PaLM 2, a new state-of-the-art language model that has better multilingual and reasoning capabilities and is more compute-efficient than its predecessor PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives. Through extensive evaluations on English and multilingual language, and reasoning tasks, we demonstrate that PaLM 2 has significantly improved quality on&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10403v3-abstract-full').style.display = 'inline'; document.getElementById('2305.10403v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10403v3-abstract-full" style="display: none;"> We introduce PaLM 2, a new state-of-the-art language model that has better multilingual and reasoning capabilities and is more compute-efficient than its predecessor PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives. Through extensive evaluations on English and multilingual language, and reasoning tasks, we demonstrate that PaLM 2 has significantly improved quality on downstream tasks across different model sizes, while simultaneously exhibiting faster and more efficient inference compared to PaLM. This improved efficiency enables broader deployment while also allowing the model to respond faster, for a more natural pace of interaction. PaLM 2 demonstrates robust reasoning capabilities exemplified by large improvements over PaLM on BIG-Bench and other reasoning tasks. PaLM 2 exhibits stable performance on a suite of responsible AI evaluations, and enables inference-time control over toxicity without additional overhead or impact on other capabilities. Overall, PaLM 2 achieves state-of-the-art performance across a diverse set of tasks and capabilities. When discussing the PaLM 2 family, it is important to distinguish between pre-trained models (of various sizes), fine-tuned variants of these models, and the user-facing products that use these models. In particular, user-facing products typically include additional pre- and post-processing steps. Additionally, the underlying models may evolve over time. Therefore, one should not expect the performance of user-facing products to exactly match the results reported in this report. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10403v3-abstract-full').style.display = 'none'; document.getElementById('2305.10403v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.02311">arXiv:2204.02311</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.02311">pdf</a>, <a href="https://arxiv.org/format/2204.02311">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PaLM: Scaling Language Modeling with Pathways </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chowdhery%2C+A">Aakanksha Chowdhery</a>, <a href="/search/cs?searchtype=author&amp;query=Narang%2C+S">Sharan Narang</a>, <a href="/search/cs?searchtype=author&amp;query=Devlin%2C+J">Jacob Devlin</a>, <a href="/search/cs?searchtype=author&amp;query=Bosma%2C+M">Maarten Bosma</a>, <a href="/search/cs?searchtype=author&amp;query=Mishra%2C+G">Gaurav Mishra</a>, <a href="/search/cs?searchtype=author&amp;query=Roberts%2C+A">Adam Roberts</a>, <a href="/search/cs?searchtype=author&amp;query=Barham%2C+P">Paul Barham</a>, <a href="/search/cs?searchtype=author&amp;query=Chung%2C+H+W">Hyung Won Chung</a>, <a href="/search/cs?searchtype=author&amp;query=Sutton%2C+C">Charles Sutton</a>, <a href="/search/cs?searchtype=author&amp;query=Gehrmann%2C+S">Sebastian Gehrmann</a>, <a href="/search/cs?searchtype=author&amp;query=Schuh%2C+P">Parker Schuh</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+K">Kensen Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Tsvyashchenko%2C+S">Sasha Tsvyashchenko</a>, <a href="/search/cs?searchtype=author&amp;query=Maynez%2C+J">Joshua Maynez</a>, <a href="/search/cs?searchtype=author&amp;query=Rao%2C+A">Abhishek Rao</a>, <a href="/search/cs?searchtype=author&amp;query=Barnes%2C+P">Parker Barnes</a>, <a href="/search/cs?searchtype=author&amp;query=Tay%2C+Y">Yi Tay</a>, <a href="/search/cs?searchtype=author&amp;query=Shazeer%2C+N">Noam Shazeer</a>, <a href="/search/cs?searchtype=author&amp;query=Prabhakaran%2C+V">Vinodkumar Prabhakaran</a>, <a href="/search/cs?searchtype=author&amp;query=Reif%2C+E">Emily Reif</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+N">Nan Du</a>, <a href="/search/cs?searchtype=author&amp;query=Hutchinson%2C+B">Ben Hutchinson</a>, <a href="/search/cs?searchtype=author&amp;query=Pope%2C+R">Reiner Pope</a>, <a href="/search/cs?searchtype=author&amp;query=Bradbury%2C+J">James Bradbury</a>, <a href="/search/cs?searchtype=author&amp;query=Austin%2C+J">Jacob Austin</a> , et al. (42 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.02311v5-abstract-short" style="display: inline;"> Large language models have been shown to achieve remarkable performance across a variety of natural language tasks using few-shot learning, which drastically reduces the number of task-specific training examples needed to adapt the model to a particular application. To further our understanding of the impact of scale on few-shot learning, we trained a 540-billion parameter, densely activated, Tran&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02311v5-abstract-full').style.display = 'inline'; document.getElementById('2204.02311v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.02311v5-abstract-full" style="display: none;"> Large language models have been shown to achieve remarkable performance across a variety of natural language tasks using few-shot learning, which drastically reduces the number of task-specific training examples needed to adapt the model to a particular application. To further our understanding of the impact of scale on few-shot learning, we trained a 540-billion parameter, densely activated, Transformer language model, which we call Pathways Language Model PaLM. We trained PaLM on 6144 TPU v4 chips using Pathways, a new ML system which enables highly efficient training across multiple TPU Pods. We demonstrate continued benefits of scaling by achieving state-of-the-art few-shot learning results on hundreds of language understanding and generation benchmarks. On a number of these tasks, PaLM 540B achieves breakthrough performance, outperforming the finetuned state-of-the-art on a suite of multi-step reasoning tasks, and outperforming average human performance on the recently released BIG-bench benchmark. A significant number of BIG-bench tasks showed discontinuous improvements from model scale, meaning that performance steeply increased as we scaled to our largest model. PaLM also has strong capabilities in multilingual tasks and source code generation, which we demonstrate on a wide array of benchmarks. We additionally provide a comprehensive analysis on bias and toxicity, and study the extent of training data memorization with respect to model scale. Finally, we discuss the ethical considerations related to large language models and discuss potential mitigation strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02311v5-abstract-full').style.display = 'none'; document.getElementById('2204.02311v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.17189">arXiv:2203.17189</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2203.17189">pdf</a>, <a href="https://arxiv.org/format/2203.17189">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Scaling Up Models and Data with $\texttt{t5x}$ and $\texttt{seqio}$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Roberts%2C+A">Adam Roberts</a>, <a href="/search/cs?searchtype=author&amp;query=Chung%2C+H+W">Hyung Won Chung</a>, <a href="/search/cs?searchtype=author&amp;query=Levskaya%2C+A">Anselm Levskaya</a>, <a href="/search/cs?searchtype=author&amp;query=Mishra%2C+G">Gaurav Mishra</a>, <a href="/search/cs?searchtype=author&amp;query=Bradbury%2C+J">James Bradbury</a>, <a href="/search/cs?searchtype=author&amp;query=Andor%2C+D">Daniel Andor</a>, <a href="/search/cs?searchtype=author&amp;query=Narang%2C+S">Sharan Narang</a>, <a href="/search/cs?searchtype=author&amp;query=Lester%2C+B">Brian Lester</a>, <a href="/search/cs?searchtype=author&amp;query=Gaffney%2C+C">Colin Gaffney</a>, <a href="/search/cs?searchtype=author&amp;query=Mohiuddin%2C+A">Afroz Mohiuddin</a>, <a href="/search/cs?searchtype=author&amp;query=Hawthorne%2C+C">Curtis Hawthorne</a>, <a href="/search/cs?searchtype=author&amp;query=Lewkowycz%2C+A">Aitor Lewkowycz</a>, <a href="/search/cs?searchtype=author&amp;query=Salcianu%2C+A">Alex Salcianu</a>, <a href="/search/cs?searchtype=author&amp;query=van+Zee%2C+M">Marc van Zee</a>, <a href="/search/cs?searchtype=author&amp;query=Austin%2C+J">Jacob Austin</a>, <a href="/search/cs?searchtype=author&amp;query=Goodman%2C+S">Sebastian Goodman</a>, <a href="/search/cs?searchtype=author&amp;query=Soares%2C+L+B">Livio Baldini Soares</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+H">Haitang Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Tsvyashchenko%2C+S">Sasha Tsvyashchenko</a>, <a href="/search/cs?searchtype=author&amp;query=Chowdhery%2C+A">Aakanksha Chowdhery</a>, <a href="/search/cs?searchtype=author&amp;query=Bastings%2C+J">Jasmijn Bastings</a>, <a href="/search/cs?searchtype=author&amp;query=Bulian%2C+J">Jannis Bulian</a>, <a href="/search/cs?searchtype=author&amp;query=Garcia%2C+X">Xavier Garcia</a>, <a href="/search/cs?searchtype=author&amp;query=Ni%2C+J">Jianmo Ni</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+A">Andrew Chen</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.17189v1-abstract-short" style="display: inline;"> Recent neural network-based language models have benefited greatly from scaling up the size of training datasets and the number of parameters in the models themselves. Scaling can be complicated due to various factors including the need to distribute computation on supercomputer clusters (e.g., TPUs), prevent bottlenecks when infeeding data, and ensure reproducible results. In this work, we presen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.17189v1-abstract-full').style.display = 'inline'; document.getElementById('2203.17189v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.17189v1-abstract-full" style="display: none;"> Recent neural network-based language models have benefited greatly from scaling up the size of training datasets and the number of parameters in the models themselves. Scaling can be complicated due to various factors including the need to distribute computation on supercomputer clusters (e.g., TPUs), prevent bottlenecks when infeeding data, and ensure reproducible results. In this work, we present two software libraries that ease these issues: $\texttt{t5x}$ simplifies the process of building and training large language models at scale while maintaining ease of use, and $\texttt{seqio}$ provides a task-based API for simple creation of fast and reproducible training data and evaluation pipelines. These open-source libraries have been used to train models with hundreds of billions of parameters on datasets with multiple terabytes of training data. Along with the libraries, we release configurations and instructions for T5-like encoder-decoder models as well as GPT-like decoder-only architectures. $\texttt{t5x}$ and $\texttt{seqio}$ are open source and available at https://github.com/google-research/t5x and https://github.com/google/seqio, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.17189v1-abstract-full').style.display = 'none'; document.getElementById('2203.17189v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1704.04760">arXiv:1704.04760</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1704.04760">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> In-Datacenter Performance Analysis of a Tensor Processing Unit </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jouppi%2C+N+P">Norman P. Jouppi</a>, <a href="/search/cs?searchtype=author&amp;query=Young%2C+C">Cliff Young</a>, <a href="/search/cs?searchtype=author&amp;query=Patil%2C+N">Nishant Patil</a>, <a href="/search/cs?searchtype=author&amp;query=Patterson%2C+D">David Patterson</a>, <a href="/search/cs?searchtype=author&amp;query=Agrawal%2C+G">Gaurav Agrawal</a>, <a href="/search/cs?searchtype=author&amp;query=Bajwa%2C+R">Raminder Bajwa</a>, <a href="/search/cs?searchtype=author&amp;query=Bates%2C+S">Sarah Bates</a>, <a href="/search/cs?searchtype=author&amp;query=Bhatia%2C+S">Suresh Bhatia</a>, <a href="/search/cs?searchtype=author&amp;query=Boden%2C+N">Nan Boden</a>, <a href="/search/cs?searchtype=author&amp;query=Borchers%2C+A">Al Borchers</a>, <a href="/search/cs?searchtype=author&amp;query=Boyle%2C+R">Rick Boyle</a>, <a href="/search/cs?searchtype=author&amp;query=Cantin%2C+P">Pierre-luc Cantin</a>, <a href="/search/cs?searchtype=author&amp;query=Chao%2C+C">Clifford Chao</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+C">Chris Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Coriell%2C+J">Jeremy Coriell</a>, <a href="/search/cs?searchtype=author&amp;query=Daley%2C+M">Mike Daley</a>, <a href="/search/cs?searchtype=author&amp;query=Dau%2C+M">Matt Dau</a>, <a href="/search/cs?searchtype=author&amp;query=Dean%2C+J">Jeffrey Dean</a>, <a href="/search/cs?searchtype=author&amp;query=Gelb%2C+B">Ben Gelb</a>, <a href="/search/cs?searchtype=author&amp;query=Ghaemmaghami%2C+T+V">Tara Vazir Ghaemmaghami</a>, <a href="/search/cs?searchtype=author&amp;query=Gottipati%2C+R">Rajendra Gottipati</a>, <a href="/search/cs?searchtype=author&amp;query=Gulland%2C+W">William Gulland</a>, <a href="/search/cs?searchtype=author&amp;query=Hagmann%2C+R">Robert Hagmann</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+C+R">C. Richard Ho</a>, <a href="/search/cs?searchtype=author&amp;query=Hogberg%2C+D">Doug Hogberg</a> , et al. (50 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1704.04760v1-abstract-short" style="display: inline;"> Many architects believe that major improvements in cost-energy-performance must now come from domain-specific hardware. This paper evaluates a custom ASIC---called a Tensor Processing Unit (TPU)---deployed in datacenters since 2015 that accelerates the inference phase of neural networks (NN). The heart of the TPU is a 65,536 8-bit MAC matrix multiply unit that offers a peak throughput of 92 TeraOp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1704.04760v1-abstract-full').style.display = 'inline'; document.getElementById('1704.04760v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1704.04760v1-abstract-full" style="display: none;"> Many architects believe that major improvements in cost-energy-performance must now come from domain-specific hardware. This paper evaluates a custom ASIC---called a Tensor Processing Unit (TPU)---deployed in datacenters since 2015 that accelerates the inference phase of neural networks (NN). The heart of the TPU is a 65,536 8-bit MAC matrix multiply unit that offers a peak throughput of 92 TeraOps/second (TOPS) and a large (28 MiB) software-managed on-chip memory. The TPU&#39;s deterministic execution model is a better match to the 99th-percentile response-time requirement of our NN applications than are the time-varying optimizations of CPUs and GPUs (caches, out-of-order execution, multithreading, multiprocessing, prefetching, ...) that help average throughput more than guaranteed latency. The lack of such features helps explain why, despite having myriad MACs and a big memory, the TPU is relatively small and low power. We compare the TPU to a server-class Intel Haswell CPU and an Nvidia K80 GPU, which are contemporaries deployed in the same datacenters. Our workload, written in the high-level TensorFlow framework, uses production NN applications (MLPs, CNNs, and LSTMs) that represent 95% of our datacenters&#39; NN inference demand. Despite low utilization for some applications, the TPU is on average about 15X - 30X faster than its contemporary GPU or CPU, with TOPS/Watt about 30X - 80X higher. Moreover, using the GPU&#39;s GDDR5 memory in the TPU would triple achieved TOPS and raise TOPS/Watt to nearly 70X the GPU and 200X the CPU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1704.04760v1-abstract-full').style.display = 'none'; document.getElementById('1704.04760v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 April, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 11 figures, 8 tables. To appear at the 44th International Symposium on Computer Architecture (ISCA), Toronto, Canada, June 24-28, 2017</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1703.04929">arXiv:1703.04929</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1703.04929">pdf</a>, <a href="https://arxiv.org/ps/1703.04929">ps</a>, <a href="https://arxiv.org/format/1703.04929">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SyntaxNet Models for the CoNLL 2017 Shared Task </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Alberti%2C+C">Chris Alberti</a>, <a href="/search/cs?searchtype=author&amp;query=Andor%2C+D">Daniel Andor</a>, <a href="/search/cs?searchtype=author&amp;query=Bogatyy%2C+I">Ivan Bogatyy</a>, <a href="/search/cs?searchtype=author&amp;query=Collins%2C+M">Michael Collins</a>, <a href="/search/cs?searchtype=author&amp;query=Gillick%2C+D">Dan Gillick</a>, <a href="/search/cs?searchtype=author&amp;query=Kong%2C+L">Lingpeng Kong</a>, <a href="/search/cs?searchtype=author&amp;query=Koo%2C+T">Terry Koo</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+J">Ji Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Omernick%2C+M">Mark Omernick</a>, <a href="/search/cs?searchtype=author&amp;query=Petrov%2C+S">Slav Petrov</a>, <a href="/search/cs?searchtype=author&amp;query=Thanapirom%2C+C">Chayut Thanapirom</a>, <a href="/search/cs?searchtype=author&amp;query=Tung%2C+Z">Zora Tung</a>, <a href="/search/cs?searchtype=author&amp;query=Weiss%2C+D">David Weiss</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1703.04929v1-abstract-short" style="display: inline;"> We describe a baseline dependency parsing system for the CoNLL2017 Shared Task. This system, which we call &#34;ParseySaurus,&#34; uses the DRAGNN framework [Kong et al, 2017] to combine transition-based recurrent parsing and tagging with character-based word representations. On the v1.3 Universal Dependencies Treebanks, the new system outpeforms the publicly available, state-of-the-art &#34;Parsey&#39;s Cousins&#34;&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.04929v1-abstract-full').style.display = 'inline'; document.getElementById('1703.04929v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1703.04929v1-abstract-full" style="display: none;"> We describe a baseline dependency parsing system for the CoNLL2017 Shared Task. This system, which we call &#34;ParseySaurus,&#34; uses the DRAGNN framework [Kong et al, 2017] to combine transition-based recurrent parsing and tagging with character-based word representations. On the v1.3 Universal Dependencies Treebanks, the new system outpeforms the publicly available, state-of-the-art &#34;Parsey&#39;s Cousins&#34; models by 3.47% absolute Labeled Accuracy Score (LAS) across 52 treebanks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.04929v1-abstract-full').style.display = 'none'; document.getElementById('1703.04929v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Tech report</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10