CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–21 of 21 results for author: <span class="mathjax">de Jong, M</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=de+Jong%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="de Jong, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=de+Jong%2C+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="de Jong, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19865">arXiv:2407.19865</a> <span> [<a href="https://arxiv.org/pdf/2407.19865">pdf</a>, <a href="https://arxiv.org/format/2407.19865">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Imitation Learning for Intra-Day Power Grid Operation through Topology Actions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Matthijs de Jong</a>, <a href="/search/cs?searchtype=author&query=Viebahn%2C+J">Jan Viebahn</a>, <a href="/search/cs?searchtype=author&query=Shapovalova%2C+Y">Yuliya Shapovalova</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19865v2-abstract-short" style="display: inline;"> Power grid operation is becoming increasingly complex due to the increase in generation of renewable energy. The recent series of Learning To Run a Power Network (L2RPN) competitions have encouraged the use of artificial agents to assist human dispatchers in operating power grids. In this paper we study the performance of imitation learning for day-ahead power grid operation through topology actio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19865v2-abstract-full').style.display = 'inline'; document.getElementById('2407.19865v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19865v2-abstract-full" style="display: none;"> Power grid operation is becoming increasingly complex due to the increase in generation of renewable energy. The recent series of Learning To Run a Power Network (L2RPN) competitions have encouraged the use of artificial agents to assist human dispatchers in operating power grids. In this paper we study the performance of imitation learning for day-ahead power grid operation through topology actions. In particular, we consider two rule-based expert agents: a greedy agent and a N-1 agent. While the latter is more computationally expensive since it takes N-1 safety considerations into account, it exhibits a much higher operational performance. We train a fully-connected neural network (FCNN) on expert state-action pairs and evaluate it in two ways. First, we find that classification accuracy is limited despite extensive hyperparameter tuning, due to class imbalance and class overlap. Second, as a power system agent, the FCNN performs only slightly worse than expert agents. Furthermore, hybrid agents, which incorporate minimal additional simulations, match expert agents' performance with significantly lower computational cost. Consequently, imitation learning shows promise for developing fast, high-performing power grid agents, motivating its further exploration in future L2RPN studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19865v2-abstract-full').style.display = 'none'; document.getElementById('2407.19865v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be presented at the Machine Learning for Sustainable Power Systems 2024 workshop and to be published in the corresponding Springer Communications in Computer and Information Science proceedings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07896">arXiv:2407.07896</a> <span> [<a href="https://arxiv.org/pdf/2407.07896">pdf</a>, <a href="https://arxiv.org/format/2407.07896">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mesoscale and Nanoscale Physics">cond-mat.mes-hall</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Space Physics">physics.space-ph</span> </div> </div> <p class="title is-5 mathjax"> Pentagonal Photonic Crystal Mirrors: Scalable Lightsails with Enhanced Acceleration via Neural Topology Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Norder%2C+L">L. Norder</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+S">S. Yin</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M+J">M. J. de Jong</a>, <a href="/search/cs?searchtype=author&query=Stallone%2C+F">F. Stallone</a>, <a href="/search/cs?searchtype=author&query=Aydogmus%2C+H">H. Aydogmus</a>, <a href="/search/cs?searchtype=author&query=Sberna%2C+P+M">P. M. Sberna</a>, <a href="/search/cs?searchtype=author&query=Bessa%2C+M+A">M. A. Bessa</a>, <a href="/search/cs?searchtype=author&query=Norte%2C+R+A">R. A. Norte</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07896v1-abstract-short" style="display: inline;"> The Starshot Breakthrough Initiative aims to send one-gram microchip probes to Alpha Centauri within 20 years, using gram-scale lightsails propelled by laser-based radiation pressure, reaching velocities nearing a fifth of light speed. This mission requires lightsail materials that challenge the fundamentals of nanotechnology, requiring innovations in optics, material science and structural engine… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07896v1-abstract-full').style.display = 'inline'; document.getElementById('2407.07896v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07896v1-abstract-full" style="display: none;"> The Starshot Breakthrough Initiative aims to send one-gram microchip probes to Alpha Centauri within 20 years, using gram-scale lightsails propelled by laser-based radiation pressure, reaching velocities nearing a fifth of light speed. This mission requires lightsail materials that challenge the fundamentals of nanotechnology, requiring innovations in optics, material science and structural engineering. Unlike the microchip payload, which must be minimized in every dimension, such lightsails need meter-scale dimensions with nanoscale thickness and billions of nanoscale holes to enhance reflectivity and reduce mass. Our study employs neural topology optimization, revealing a novel pentagonal lattice-based photonic crystal (PhC) reflector. The optimized designs shorten acceleration times, therefore lowering launch costs significantly. Crucially, these designs also enable lightsail material fabrication with orders-of-magnitude reduction in costs. We have fabricated a 60 x 60 mm$^2$, 200nm thick, single-layer reflector perforated with over a billion nanoscale features; the highest aspect-ratio nanophotonic element to date. We achieve this with nearly 9,000 times cost reduction per m$^2$. Starshot lightsails will have several stringent requirements but will ultimately be driven by costs to build at scale. Here we highlight challenges and possible solutions in developing lightsail materials - showcasing the potential of scaling nanophotonics for cost-effective next-generation space exploration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07896v1-abstract-full').style.display = 'none'; document.getElementById('2407.07896v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.14903">arXiv:2308.14903</a> <span> [<a href="https://arxiv.org/pdf/2308.14903">pdf</a>, <a href="https://arxiv.org/format/2308.14903">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MEMORY-VQ: Compression for Tractable Internet-Scale Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Vilnis%2C+L">Luke Vilnis</a>, <a href="/search/cs?searchtype=author&query=Onta%C3%B1%C3%B3n%2C+S">Santiago Onta帽贸n</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&query=Sanghai%2C+S">Sumit Sanghai</a>, <a href="/search/cs?searchtype=author&query=Ainslie%2C+J">Joshua Ainslie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.14903v1-abstract-short" style="display: inline;"> Retrieval augmentation is a powerful but expensive method to make language models more knowledgeable about the world. Memory-based methods like LUMEN pre-compute token representations for retrieved passages to drastically speed up inference. However, memory also leads to much greater storage requirements from storing pre-computed representations. We propose MEMORY-VQ, a new method to reduce stor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.14903v1-abstract-full').style.display = 'inline'; document.getElementById('2308.14903v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.14903v1-abstract-full" style="display: none;"> Retrieval augmentation is a powerful but expensive method to make language models more knowledgeable about the world. Memory-based methods like LUMEN pre-compute token representations for retrieved passages to drastically speed up inference. However, memory also leads to much greater storage requirements from storing pre-computed representations. We propose MEMORY-VQ, a new method to reduce storage requirements of memory-augmented models without sacrificing performance. Our method uses a vector quantization variational autoencoder (VQ-VAE) to compress token representations. We apply MEMORY-VQ to the LUMEN model to obtain LUMEN-VQ, a memory model that achieves a 16x compression rate with comparable performance on the KILT benchmark. LUMEN-VQ enables practical retrieval augmentation even for extremely large retrieval corpora. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.14903v1-abstract-full').style.display = 'none'; document.getElementById('2308.14903v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.10231">arXiv:2306.10231</a> <span> [<a href="https://arxiv.org/pdf/2306.10231">pdf</a>, <a href="https://arxiv.org/format/2306.10231">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GLIMMER: generalized late-interaction memory reranker </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&query=FitzGerald%2C+N">Nicholas FitzGerald</a>, <a href="/search/cs?searchtype=author&query=Sanghai%2C+S">Sumit Sanghai</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&query=Ainslie%2C+J">Joshua Ainslie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.10231v1-abstract-short" style="display: inline;"> Memory-augmentation is a powerful approach for efficiently incorporating external information into language models, but leads to reduced performance relative to retrieving text. Recent work introduced LUMEN, a memory-retrieval hybrid that partially pre-computes memory and updates memory representations on the fly with a smaller live encoder. We propose GLIMMER, which improves on this approach th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10231v1-abstract-full').style.display = 'inline'; document.getElementById('2306.10231v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.10231v1-abstract-full" style="display: none;"> Memory-augmentation is a powerful approach for efficiently incorporating external information into language models, but leads to reduced performance relative to retrieving text. Recent work introduced LUMEN, a memory-retrieval hybrid that partially pre-computes memory and updates memory representations on the fly with a smaller live encoder. We propose GLIMMER, which improves on this approach through 1) exploiting free access to the powerful memory representations by applying a shallow reranker on top of memory to drastically improve retrieval quality at low cost, and 2) incorporating multi-task training to learn a general and higher quality memory and live encoder. GLIMMER achieves strong gains in performance at faster speeds compared to LUMEN and FiD on the KILT benchmark of knowledge-intensive tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10231v1-abstract-full').style.display = 'none'; document.getElementById('2306.10231v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.13245">arXiv:2305.13245</a> <span> [<a href="https://arxiv.org/pdf/2305.13245">pdf</a>, <a href="https://arxiv.org/format/2305.13245">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ainslie%2C+J">Joshua Ainslie</a>, <a href="/search/cs?searchtype=author&query=Lee-Thorp%2C+J">James Lee-Thorp</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&query=Lebr%C3%B3n%2C+F">Federico Lebr贸n</a>, <a href="/search/cs?searchtype=author&query=Sanghai%2C+S">Sumit Sanghai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.13245v3-abstract-short" style="display: inline;"> Multi-query attention (MQA), which only uses a single key-value head, drastically speeds up decoder inference. However, MQA can lead to quality degradation, and moreover it may not be desirable to train a separate model just for faster inference. We (1) propose a recipe for uptraining existing multi-head language model checkpoints into models with MQA using 5% of original pre-training compute, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13245v3-abstract-full').style.display = 'inline'; document.getElementById('2305.13245v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.13245v3-abstract-full" style="display: none;"> Multi-query attention (MQA), which only uses a single key-value head, drastically speeds up decoder inference. However, MQA can lead to quality degradation, and moreover it may not be desirable to train a separate model just for faster inference. We (1) propose a recipe for uptraining existing multi-head language model checkpoints into models with MQA using 5% of original pre-training compute, and (2) introduce grouped-query attention (GQA), a generalization of multi-query attention which uses an intermediate (more than one, less than number of query heads) number of key-value heads. We show that uptrained GQA achieves quality close to multi-head attention with comparable speed to MQA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13245v3-abstract-full').style.display = 'none'; document.getElementById('2305.13245v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2023. Added to related work</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.09752">arXiv:2303.09752</a> <span> [<a href="https://arxiv.org/pdf/2303.09752">pdf</a>, <a href="https://arxiv.org/format/2303.09752">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CoLT5: Faster Long-Range Transformers with Conditional Computation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ainslie%2C+J">Joshua Ainslie</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+T">Tao Lei</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Onta%C3%B1%C3%B3n%2C+S">Santiago Onta帽贸n</a>, <a href="/search/cs?searchtype=author&query=Brahma%2C+S">Siddhartha Brahma</a>, <a href="/search/cs?searchtype=author&query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&query=Uthus%2C+D">David Uthus</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+M">Mandy Guo</a>, <a href="/search/cs?searchtype=author&query=Lee-Thorp%2C+J">James Lee-Thorp</a>, <a href="/search/cs?searchtype=author&query=Tay%2C+Y">Yi Tay</a>, <a href="/search/cs?searchtype=author&query=Sung%2C+Y">Yun-Hsuan Sung</a>, <a href="/search/cs?searchtype=author&query=Sanghai%2C+S">Sumit Sanghai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.09752v3-abstract-short" style="display: inline;"> Many natural language processing tasks benefit from long inputs, but processing long documents with Transformers is expensive -- not only due to quadratic attention complexity but also from applying feedforward and projection layers to every token. However, not all tokens are equally important, especially for longer documents. We propose CoLT5, a long-input Transformer model that builds on this in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.09752v3-abstract-full').style.display = 'inline'; document.getElementById('2303.09752v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.09752v3-abstract-full" style="display: none;"> Many natural language processing tasks benefit from long inputs, but processing long documents with Transformers is expensive -- not only due to quadratic attention complexity but also from applying feedforward and projection layers to every token. However, not all tokens are equally important, especially for longer documents. We propose CoLT5, a long-input Transformer model that builds on this intuition by employing conditional computation, devoting more resources to important tokens in both feedforward and attention layers. We show that CoLT5 achieves stronger performance than LongT5 with much faster training and inference, achieving SOTA on the long-input SCROLLS benchmark. Moreover, CoLT5 can effectively and tractably make use of extremely long inputs, showing strong gains up to 64k input length. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.09752v3-abstract-full').style.display = 'none'; document.getElementById('2303.09752v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.10448">arXiv:2301.10448</a> <span> [<a href="https://arxiv.org/pdf/2301.10448">pdf</a>, <a href="https://arxiv.org/format/2301.10448">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Pre-computed memory or on-the-fly encoding? A hybrid approach to retrieval augmentation makes the most of your compute </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&query=FitzGerald%2C+N">Nicholas FitzGerald</a>, <a href="/search/cs?searchtype=author&query=Ainslie%2C+J">Joshua Ainslie</a>, <a href="/search/cs?searchtype=author&query=Sanghai%2C+S">Sumit Sanghai</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+F">Fei Sha</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+W">William Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.10448v2-abstract-short" style="display: inline;"> Retrieval-augmented language models such as Fusion-in-Decoder are powerful, setting the state of the art on a variety of knowledge-intensive tasks. However, they are also expensive, due to the need to encode a large number of retrieved passages. Some work avoids this cost by pre-encoding a text corpus into a memory and retrieving dense representations directly. However, pre-encoding memory incurs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10448v2-abstract-full').style.display = 'inline'; document.getElementById('2301.10448v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.10448v2-abstract-full" style="display: none;"> Retrieval-augmented language models such as Fusion-in-Decoder are powerful, setting the state of the art on a variety of knowledge-intensive tasks. However, they are also expensive, due to the need to encode a large number of retrieved passages. Some work avoids this cost by pre-encoding a text corpus into a memory and retrieving dense representations directly. However, pre-encoding memory incurs a severe quality penalty as the memory representations are not conditioned on the current input. We propose LUMEN, a hybrid between these two extremes, pre-computing the majority of the retrieval representation and completing the encoding on the fly using a live encoder that is conditioned on the question and fine-tuned for the task. We show that LUMEN significantly outperforms pure memory on multiple question-answering tasks while being much cheaper than FiD, and outperforms both for any given compute budget. Moreover, the advantage of LUMEN over FiD increases with model size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10448v2-abstract-full').style.display = 'none'; document.getElementById('2301.10448v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.08153">arXiv:2212.08153</a> <span> [<a href="https://arxiv.org/pdf/2212.08153">pdf</a>, <a href="https://arxiv.org/format/2212.08153">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FiDO: Fusion-in-Decoder optimized for stronger performance and faster inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&query=Ainslie%2C+J">Joshua Ainslie</a>, <a href="/search/cs?searchtype=author&query=FitzGerald%2C+N">Nicholas FitzGerald</a>, <a href="/search/cs?searchtype=author&query=Sanghai%2C+S">Sumit Sanghai</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+F">Fei Sha</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+W">William Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.08153v2-abstract-short" style="display: inline;"> Fusion-in-Decoder (FiD) is a powerful retrieval-augmented language model that sets the state-of-the-art on many knowledge-intensive NLP tasks. However, the architecture used for FiD was chosen by making minimal modifications to a standard T5 model, which our analysis shows to be highly suboptimal for a retrieval-augmented model. In particular, FiD allocates the bulk of FLOPs to the encoder, while… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08153v2-abstract-full').style.display = 'inline'; document.getElementById('2212.08153v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.08153v2-abstract-full" style="display: none;"> Fusion-in-Decoder (FiD) is a powerful retrieval-augmented language model that sets the state-of-the-art on many knowledge-intensive NLP tasks. However, the architecture used for FiD was chosen by making minimal modifications to a standard T5 model, which our analysis shows to be highly suboptimal for a retrieval-augmented model. In particular, FiD allocates the bulk of FLOPs to the encoder, while the majority of inference time results from memory bandwidth constraints in the decoder. We propose two simple changes to the FiD architecture to alleviate memory bandwidth constraints, and speed up inference by 7x. This allows us to use a much larger decoder at modest cost. We denote FiD with the above modifications as FiDO, and show that it strongly improves performance over existing FiD models for a wide range of inference budgets. For example, FiDO-Large-XXL performs faster inference than FiD-Base and achieves better performance than FiD-Large. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08153v2-abstract-full').style.display = 'none'; document.getElementById('2212.08153v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL Findings 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.14899">arXiv:2209.14899</a> <span> [<a href="https://arxiv.org/pdf/2209.14899">pdf</a>, <a href="https://arxiv.org/format/2209.14899">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Generate-and-Retrieve: use your predictions to improve retrieval for semantic parsing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Ainslie%2C+J">Joshua Ainslie</a>, <a href="/search/cs?searchtype=author&query=Pasupat%2C+P">Panupong Pasupat</a>, <a href="/search/cs?searchtype=author&query=Shaw%2C+P">Peter Shaw</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+L">Linlu Qiu</a>, <a href="/search/cs?searchtype=author&query=Sanghai%2C+S">Sumit Sanghai</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+F">Fei Sha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.14899v1-abstract-short" style="display: inline;"> A common recent approach to semantic parsing augments sequence-to-sequence models by retrieving and appending a set of training samples, called exemplars. The effectiveness of this recipe is limited by the ability to retrieve informative exemplars that help produce the correct parse, which is especially challenging in low-resource settings. Existing retrieval is commonly based on similarity of que… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.14899v1-abstract-full').style.display = 'inline'; document.getElementById('2209.14899v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.14899v1-abstract-full" style="display: none;"> A common recent approach to semantic parsing augments sequence-to-sequence models by retrieving and appending a set of training samples, called exemplars. The effectiveness of this recipe is limited by the ability to retrieve informative exemplars that help produce the correct parse, which is especially challenging in low-resource settings. Existing retrieval is commonly based on similarity of query and exemplar inputs. We propose GandR, a retrieval procedure that retrieves exemplars for which outputs are also similar. GandRfirst generates a preliminary prediction with input-based retrieval. Then, it retrieves exemplars with outputs similar to the preliminary prediction which are used to generate a final prediction. GandR sets the state of the art on multiple low-resource semantic parsing tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.14899v1-abstract-full').style.display = 'none'; document.getElementById('2209.14899v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in the proceedings of COLING 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.00630">arXiv:2207.00630</a> <span> [<a href="https://arxiv.org/pdf/2207.00630">pdf</a>, <a href="https://arxiv.org/format/2207.00630">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> QA Is the New KR: Question-Answer Pairs as Knowledge Bases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&query=De+Jong%2C+M">Michiel De Jong</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+N">Nitish Gupta</a>, <a href="/search/cs?searchtype=author&query=Presta%2C+A">Alessandro Presta</a>, <a href="/search/cs?searchtype=author&query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&query=Wieting%2C+J">John Wieting</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.00630v1-abstract-short" style="display: inline;"> In this position paper, we propose a new approach to generating a type of knowledge base (KB) from text, based on question generation and entity linking. We argue that the proposed type of KB has many of the key advantages of a traditional symbolic KB: in particular, it consists of small modular components, which can be combined compositionally to answer complex queries, including relational queri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.00630v1-abstract-full').style.display = 'inline'; document.getElementById('2207.00630v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.00630v1-abstract-full" style="display: none;"> In this position paper, we propose a new approach to generating a type of knowledge base (KB) from text, based on question generation and entity linking. We argue that the proposed type of KB has many of the key advantages of a traditional symbolic KB: in particular, it consists of small modular components, which can be combined compositionally to answer complex queries, including relational queries and queries involving "multi-hop" inferences. However, unlike a traditional KB, this information store is well-aligned with common user information needs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.00630v1-abstract-full').style.display = 'none'; document.getElementById('2207.00630v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.04581">arXiv:2204.04581</a> <span> [<a href="https://arxiv.org/pdf/2204.04581">pdf</a>, <a href="https://arxiv.org/format/2204.04581">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Augmenting Pre-trained Language Models with QA-Memory for Open-Domain Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Wieting%2C+J">John Wieting</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+W">William Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.04581v3-abstract-short" style="display: inline;"> Retrieval augmented language models have recently become the standard for knowledge intensive tasks. Rather than relying purely on latent semantics within the parameters of large neural models, these methods enlist a semi-parametric memory to encode an index of knowledge for the model to retrieve over. Most prior work has employed text passages as the unit of knowledge, which has high coverage at… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.04581v3-abstract-full').style.display = 'inline'; document.getElementById('2204.04581v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.04581v3-abstract-full" style="display: none;"> Retrieval augmented language models have recently become the standard for knowledge intensive tasks. Rather than relying purely on latent semantics within the parameters of large neural models, these methods enlist a semi-parametric memory to encode an index of knowledge for the model to retrieve over. Most prior work has employed text passages as the unit of knowledge, which has high coverage at the cost of interpretability, controllability, and efficiency. The opposite properties arise in other methods which have instead relied on knowledge base (KB) facts. At the same time, more recent work has demonstrated the effectiveness of storing and retrieving from an index of Q-A pairs derived from text \citep{lewis2021paq}. This approach yields a high coverage knowledge representation that maintains KB-like properties due to its representations being more atomic units of information. In this work we push this line of research further by proposing a question-answer augmented encoder-decoder model and accompanying pretraining strategy. This yields an end-to-end system that not only outperforms prior QA retrieval methods on single-hop QA tasks but also enables compositional reasoning, as demonstrated by strong performance on two multi-hop QA datasets. Together, these methods improve the ability to interpret and control the model while narrowing the performance gap with passage retrieval systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.04581v3-abstract-full').style.display = 'none'; document.getElementById('2204.04581v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.06176">arXiv:2110.06176</a> <span> [<a href="https://arxiv.org/pdf/2110.06176">pdf</a>, <a href="https://arxiv.org/format/2110.06176">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mention Memory: incorporating textual knowledge into Transformers through entity mention attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&query=FitzGerald%2C+N">Nicholas FitzGerald</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+F">Fei Sha</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+W">William Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.06176v2-abstract-short" style="display: inline;"> Natural language understanding tasks such as open-domain question answering often require retrieving and assimilating factual information from multiple sources. We propose to address this problem by integrating a semi-parametric representation of a large text corpus into a Transformer model as a source of factual knowledge. Specifically, our method represents knowledge with `mention memory', a tab… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.06176v2-abstract-full').style.display = 'inline'; document.getElementById('2110.06176v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.06176v2-abstract-full" style="display: none;"> Natural language understanding tasks such as open-domain question answering often require retrieving and assimilating factual information from multiple sources. We propose to address this problem by integrating a semi-parametric representation of a large text corpus into a Transformer model as a source of factual knowledge. Specifically, our method represents knowledge with `mention memory', a table of dense vector representations of every entity mention in a corpus. The proposed model - TOME - is a Transformer that accesses the information through internal memory layers in which each entity mention in the input passage attends to the mention memory. This approach enables synthesis of and reasoning over many disparate sources of information within a single Transformer model. In experiments using a memory of 150 million Wikipedia mentions, TOME achieves strong performance on several open-domain knowledge-intensive tasks, including the claim verification benchmarks HoVer and FEVER and several entity-based QA benchmarks. We also show that the model learns to attend to informative mentions without any direct supervision. Finally we demonstrate that the model can generalize to new unseen entities by updating the memory without retraining. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.06176v2-abstract-full').style.display = 'none'; document.getElementById('2110.06176v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.04809">arXiv:2108.04809</a> <span> [<a href="https://arxiv.org/pdf/2108.04809">pdf</a>, <a href="https://arxiv.org/format/2108.04809">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mesoscale and Nanoscale Physics">cond-mat.mes-hall</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1002/adma.202106248">10.1002/adma.202106248 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Spiderweb nanomechanical resonators via Bayesian optimization: inspired by nature and guided by machine learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shin%2C+D">Dongil Shin</a>, <a href="/search/cs?searchtype=author&query=Cupertino%2C+A">Andrea Cupertino</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M+H+J">Matthijs H. J. de Jong</a>, <a href="/search/cs?searchtype=author&query=Steeneken%2C+P+G">Peter G. Steeneken</a>, <a href="/search/cs?searchtype=author&query=Bessa%2C+M+A">Miguel A. Bessa</a>, <a href="/search/cs?searchtype=author&query=Norte%2C+R+A">Richard A. Norte</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.04809v2-abstract-short" style="display: inline;"> From ultra-sensitive detectors of fundamental forces to quantum networks and sensors, mechanical resonators are enabling next-generation technologies to operate in room temperature environments. Currently, silicon nitride nanoresonators stand as a leading microchip platform in these advances by allowing for mechanical resonators whose motion is remarkably isolated from ambient thermal noise. Howev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.04809v2-abstract-full').style.display = 'inline'; document.getElementById('2108.04809v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.04809v2-abstract-full" style="display: none;"> From ultra-sensitive detectors of fundamental forces to quantum networks and sensors, mechanical resonators are enabling next-generation technologies to operate in room temperature environments. Currently, silicon nitride nanoresonators stand as a leading microchip platform in these advances by allowing for mechanical resonators whose motion is remarkably isolated from ambient thermal noise. However, to date, human intuition has remained the driving force behind design processes. Here, inspired by nature and guided by machine learning, a spiderweb nanomechanical resonator is developed that exhibits vibration modes which are isolated from ambient thermal environments via a novel "torsional soft-clamping" mechanism discovered by the data-driven optimization algorithm. This bio-inspired resonator is then fabricated; experimentally confirming a new paradigm in mechanics with quality factors above 1 billion in room temperature environments. In contrast to other state-of-the-art resonators, this milestone is achieved with a compact design which does not require sub-micron lithographic features or complex phononic bandgaps, making it significantly easier and cheaper to manufacture at large scales. Here we demonstrate the ability of machine learning to work in tandem with human intuition to augment creative possibilities and uncover new strategies in computing and nanotechnology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.04809v2-abstract-full').style.display = 'none'; document.getElementById('2108.04809v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Shin, D., Cupertino, A., de, M. H. J., Steeneken, P. G., Bessa, M. A., Norte, R. A., Spiderweb Nanomechanical Resonators via Bayesian Optimization: Inspired by Nature and Guided by Machine Learning. Adv. Mater. 2021, 2106248 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.01979">arXiv:2107.01979</a> <span> [<a href="https://arxiv.org/pdf/2107.01979">pdf</a>, <a href="https://arxiv.org/format/2107.01979">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Machine Learning for Fraud Detection in E-Commerce: A Research Agenda </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tax%2C+N">Niek Tax</a>, <a href="/search/cs?searchtype=author&query=de+Vries%2C+K+J">Kees Jan de Vries</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Mathijs de Jong</a>, <a href="/search/cs?searchtype=author&query=Dosoula%2C+N">Nikoleta Dosoula</a>, <a href="/search/cs?searchtype=author&query=Akker%2C+B+v+d">Bram van den Akker</a>, <a href="/search/cs?searchtype=author&query=Smith%2C+J">Jon Smith</a>, <a href="/search/cs?searchtype=author&query=Thuong%2C+O">Olivier Thuong</a>, <a href="/search/cs?searchtype=author&query=Bernardi%2C+L">Lucas Bernardi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.01979v1-abstract-short" style="display: inline;"> Fraud detection and prevention play an important part in ensuring the sustained operation of any e-commerce business. Machine learning (ML) often plays an important role in these anti-fraud operations, but the organizational context in which these ML models operate cannot be ignored. In this paper, we take an organization-centric view on the topic of fraud detection by formulating an operational m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.01979v1-abstract-full').style.display = 'inline'; document.getElementById('2107.01979v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.01979v1-abstract-full" style="display: none;"> Fraud detection and prevention play an important part in ensuring the sustained operation of any e-commerce business. Machine learning (ML) often plays an important role in these anti-fraud operations, but the organizational context in which these ML models operate cannot be ignored. In this paper, we take an organization-centric view on the topic of fraud detection by formulating an operational model of the anti-fraud departments in e-commerce organizations. We derive 6 research topics and 12 practical challenges for fraud detection from this operational model. We summarize the state of the literature for each research topic, discuss potential solutions to the practical challenges, and identify 22 open research challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.01979v1-abstract-full').style.display = 'none'; document.getElementById('2107.01979v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted and to appear in the proceedings of the KDD 2021 co-located workshop: the 2nd International Workshop on Deployable Machine Learning for Security Defense (MLHat)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.01607">arXiv:2106.01607</a> <span> [<a href="https://arxiv.org/pdf/2106.01607">pdf</a>, <a href="https://arxiv.org/format/2106.01607">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Grounding Complex Navigational Instructions Using Scene Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Krishna%2C+S">Satyapriya Krishna</a>, <a href="/search/cs?searchtype=author&query=Agarwal%2C+A">Anuva Agarwal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.01607v1-abstract-short" style="display: inline;"> Training a reinforcement learning agent to carry out natural language instructions is limited by the available supervision, i.e. knowing when the instruction has been carried out. We adapt the CLEVR visual question answering dataset to generate complex natural language navigation instructions and accompanying scene graphs, yielding an environment-agnostic supervised dataset. To demonstrate the use… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.01607v1-abstract-full').style.display = 'inline'; document.getElementById('2106.01607v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.01607v1-abstract-full" style="display: none;"> Training a reinforcement learning agent to carry out natural language instructions is limited by the available supervision, i.e. knowing when the instruction has been carried out. We adapt the CLEVR visual question answering dataset to generate complex natural language navigation instructions and accompanying scene graphs, yielding an environment-agnostic supervised dataset. To demonstrate the use of this data set, we map the scenes to the VizDoom environment and use the architecture in \citet{gatedattention} to train an agent to carry out these more complex language instructions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.01607v1-abstract-full').style.display = 'none'; document.getElementById('2106.01607v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:1706.07230 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.04241">arXiv:2105.04241</a> <span> [<a href="https://arxiv.org/pdf/2105.04241">pdf</a>, <a href="https://arxiv.org/format/2105.04241">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ReadTwice: Reading Very Large Documents with Memories </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&query=Ainslie%2C+J">Joshua Ainslie</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+P">Philip Pham</a>, <a href="/search/cs?searchtype=author&query=Eckstein%2C+I">Ilya Eckstein</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+F">Fei Sha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.04241v2-abstract-short" style="display: inline;"> Knowledge-intensive tasks such as question answering often require assimilating information from different sections of large inputs such as books or article collections. We propose ReadTwice, a simple and effective technique that combines several strengths of prior approaches to model long-range dependencies with Transformers. The main idea is to read text in small segments, in parallel, summarizi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.04241v2-abstract-full').style.display = 'inline'; document.getElementById('2105.04241v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.04241v2-abstract-full" style="display: none;"> Knowledge-intensive tasks such as question answering often require assimilating information from different sections of large inputs such as books or article collections. We propose ReadTwice, a simple and effective technique that combines several strengths of prior approaches to model long-range dependencies with Transformers. The main idea is to read text in small segments, in parallel, summarizing each segment into a memory table to be used in a second read of the text. We show that the method outperforms models of comparable size on several question answering (QA) datasets and sets a new state of the art on the challenging NarrativeQA task, with questions about entire books. Source code and pre-trained checkpoints for ReadTwice can be found at https://goo.gle/research-readtwice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.04241v2-abstract-full').style.display = 'none'; document.getElementById('2105.04241v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in the proceedings of NAACL 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1907.02597">arXiv:1907.02597</a> <span> [<a href="https://arxiv.org/pdf/1907.02597">pdf</a>, <a href="https://arxiv.org/ps/1907.02597">ps</a>, <a href="https://arxiv.org/format/1907.02597">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Multi-dimensional interpolations in C++ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Maarten de Jong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1907.02597v1-abstract-short" style="display: inline;"> A C++ software design is presented that can be used to interpolate data in any number of dimensions. The design is based on a combination of templates of functional collections of elements and so-called type lists. The design allows for different search methodologies and interpolation techniques in each dimension. It is also possible to expand and reduce the number of dimensions, to interpolate co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.02597v1-abstract-full').style.display = 'inline'; document.getElementById('1907.02597v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1907.02597v1-abstract-full" style="display: none;"> A C++ software design is presented that can be used to interpolate data in any number of dimensions. The design is based on a combination of templates of functional collections of elements and so-called type lists. The design allows for different search methodologies and interpolation techniques in each dimension. It is also possible to expand and reduce the number of dimensions, to interpolate composite data types and to produce on-the-fly additional values such as derivatives of the interpolating function. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.02597v1-abstract-full').style.display = 'none'; document.getElementById('1907.02597v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.06805">arXiv:1906.06805</a> <span> [<a href="https://arxiv.org/pdf/1906.06805">pdf</a>, <a href="https://arxiv.org/format/1906.06805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Neural Theorem Provers Do Not Learn Rules Without Exploration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+F">Fei Sha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.06805v1-abstract-short" style="display: inline;"> Neural symbolic processing aims to combine the generalization of logical learning approaches and the performance of neural networks. The Neural Theorem Proving (NTP) model by Rocktaschel et al (2017) learns embeddings for concepts and performs logical unification. While NTP is promising and effective in predicting facts accurately, we have little knowledge how well it can extract true relationship… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.06805v1-abstract-full').style.display = 'inline'; document.getElementById('1906.06805v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.06805v1-abstract-full" style="display: none;"> Neural symbolic processing aims to combine the generalization of logical learning approaches and the performance of neural networks. The Neural Theorem Proving (NTP) model by Rocktaschel et al (2017) learns embeddings for concepts and performs logical unification. While NTP is promising and effective in predicting facts accurately, we have little knowledge how well it can extract true relationship among data. To this end, we create synthetic logical datasets with injected relationships, which can be generated on-the-fly, to test neural-based relation learning algorithms including NTP. We show that it has difficulty recovering relationships in all but the simplest settings. Critical analysis and diagnostic experiments suggest that the optimization algorithm suffers from poor local minima due to its greedy winner-takes-all strategy in identifying the most informative structure (proof path) to pursue. We alter the NTP algorithm to increase exploration, which sharply improves performance. We argue and demonstate that it is insightful to benchmark with synthetic data with ground-truth relationships, for both evaluating models and revealing algorithmic issues. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.06805v1-abstract-full').style.display = 'none'; document.getElementById('1906.06805v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1812.02253">arXiv:1812.02253</a> <span> [<a href="https://arxiv.org/pdf/1812.02253">pdf</a>, <a href="https://arxiv.org/format/1812.02253">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Weighted Global Normalization for Multiple Choice Reading Comprehension over Long Documents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chaudhary%2C+A">Aditi Chaudhary</a>, <a href="/search/cs?searchtype=author&query=Paranjape%2C+B">Bhargavi Paranjape</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Michiel de Jong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1812.02253v2-abstract-short" style="display: inline;"> Motivated by recent evidence pointing out the fragility of high-performing span prediction models, we direct our attention to multiple choice reading comprehension. In particular, this work introduces a novel method for improving answer selection on long documents through weighted global normalization of predictions over portions of the documents. We show that applying our method to a span predict… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.02253v2-abstract-full').style.display = 'inline'; document.getElementById('1812.02253v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1812.02253v2-abstract-full" style="display: none;"> Motivated by recent evidence pointing out the fragility of high-performing span prediction models, we direct our attention to multiple choice reading comprehension. In particular, this work introduces a novel method for improving answer selection on long documents through weighted global normalization of predictions over portions of the documents. We show that applying our method to a span prediction model adapted for answer selection helps model performance on long summaries from NarrativeQA, a challenging reading comprehension dataset with an answer selection task, and we strongly improve on the task baseline performance by +36.2 Mean Reciprocal Rank. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1812.02253v2-abstract-full').style.display = 'none'; document.getElementById('1812.02253v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.07885">arXiv:1805.07885</a> <span> [<a href="https://arxiv.org/pdf/1805.07885">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/en11051277">10.3390/en11051277 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The Governance of Risks in Ridesharing: A Revelatory Case from Singapore </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yanwei Li</a>, <a href="/search/cs?searchtype=author&query=Taeihagh%2C+A">Araz Taeihagh</a>, <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">Martin de Jong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.07885v1-abstract-short" style="display: inline;"> Recently we have witnessed the worldwide adoption of many different types of innovative technologies, such as crowdsourcing, ridesharing, open and big data, aiming at delivering public services more efficiently and effectively. Among them, ridesharing has received substantial attention from decision-makers around the world. Because of the multitude of currently understood or potentially unknown ri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.07885v1-abstract-full').style.display = 'inline'; document.getElementById('1805.07885v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.07885v1-abstract-full" style="display: none;"> Recently we have witnessed the worldwide adoption of many different types of innovative technologies, such as crowdsourcing, ridesharing, open and big data, aiming at delivering public services more efficiently and effectively. Among them, ridesharing has received substantial attention from decision-makers around the world. Because of the multitude of currently understood or potentially unknown risks associated with ridesharing (unemployment, insurance, information privacy, and environmental risk), governments in different countries apply different strategies to address such risks. Some governments prohibit the adoption of ridesharing altogether, while other governments promote it. In this article, we address the question of how risks involved in ridesharing are governed over time. We present an in-depth single case study on Singapore and examine how the Singaporean government has addressed risks in ridesharing over time. The Singaporean government has a strong ambition to become an innovation hub, and many innovative technologies have been adopted and promoted to that end. At the same time, decision-makers in Singapore are reputed for their proactive style of social governance. The example of Singapore can be regarded as a revelatory case study, helping us further to explore governance practices in other countries. Keywords: risk; ridesharing; transport; governance; innovative technologies; case study; Singapore <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.07885v1-abstract-full').style.display = 'none'; document.getElementById('1805.07885v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Energies 11, no. 5: 1277 (2018) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1708.06293">arXiv:1708.06293</a> <span> [<a href="https://arxiv.org/pdf/1708.06293">pdf</a>, <a href="https://arxiv.org/ps/1708.06293">ps</a>, <a href="https://arxiv.org/format/1708.06293">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Other Computer Science">cs.OH</span> </div> </div> <p class="title is-5 mathjax"> Neville's algorithm revisited </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=de+Jong%2C+M">M. de Jong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1708.06293v1-abstract-short" style="display: inline;"> Neville's algorithm is known to provide an efficient and numerically stable solution for polynomial interpolations. In this paper, an extension of this algorithm is presented which includes the derivatives of the interpolating polynomial. </span> <span class="abstract-full has-text-grey-dark mathjax" id="1708.06293v1-abstract-full" style="display: none;"> Neville's algorithm is known to provide an efficient and numerically stable solution for polynomial interpolations. In this paper, an extension of this algorithm is presented which includes the derivatives of the interpolating polynomial. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1708.06293v1-abstract-full').style.display = 'none'; document.getElementById('1708.06293v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">3 pages</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>