CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 93 results for author: <span class="mathjax">Cohen, W</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Cohen%2C+W">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Cohen, W"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Cohen%2C+W&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Cohen, W"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Cohen%2C+W&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Cohen%2C+W&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cohen%2C+W&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18777">arXiv:2410.18777</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.18777">pdf</a>, <a href="https://arxiv.org/format/2410.18777">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Online path planning for kinematic-constrained UAVs in a dynamic environment based on a Differential Evolution algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Freitas%2C+E+J+R">Elias J. R. Freitas</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+M+W">Miri Weiss Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Guimar%C3%A3es%2C+F+G">Frederico G. Guimar茫es</a>, <a href="/search/cs?searchtype=author&amp;query=Pimenta%2C+L+C+A">Luciano C. A. Pimenta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18777v1-abstract-short" style="display: inline;"> This research presents an online path planner for Unmanned Aerial Vehicles (UAVs) that can handle dynamic obstacles and UAV motion constraints, including maximum curvature and desired orientations. Our proposed planner uses a NURBS path representation and a Differential Evolution algorithm, incorporating concepts from the Velocity Obstacle approach in a constraint function. Initial results show th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18777v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18777v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18777v1-abstract-full" style="display: none;"> This research presents an online path planner for Unmanned Aerial Vehicles (UAVs) that can handle dynamic obstacles and UAV motion constraints, including maximum curvature and desired orientations. Our proposed planner uses a NURBS path representation and a Differential Evolution algorithm, incorporating concepts from the Velocity Obstacle approach in a constraint function. Initial results show that our approach is feasible and provides a foundation for future extensions to three-dimensional (3D) environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18777v1-abstract-full').style.display = 'none'; document.getElementById('2410.18777v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the 40th Anniversary of the IEEE Conference on Robotics and Automation (ICRA@40)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15359">arXiv:2409.15359</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.15359">pdf</a>, <a href="https://arxiv.org/format/2409.15359">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Watch Your Steps: Observable and Modular Chains of Thought </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+C+A">Cassandra A. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15359v2-abstract-short" style="display: inline;"> We propose a variant of chain of thought (CoT) prompting called Program Trace Prompting that makes explanations more observable while preserving the power, generality and flexibility of CoT. In our approach, few-shot CoT demonstrations are wrapped in a formal syntax based on Python, and each prompt: identifies and names steps; defines the input/output behavior of steps; and replaces CoT explanatio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15359v2-abstract-full').style.display = 'inline'; document.getElementById('2409.15359v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15359v2-abstract-full" style="display: none;"> We propose a variant of chain of thought (CoT) prompting called Program Trace Prompting that makes explanations more observable while preserving the power, generality and flexibility of CoT. In our approach, few-shot CoT demonstrations are wrapped in a formal syntax based on Python, and each prompt: identifies and names steps; defines the input/output behavior of steps; and replaces CoT explanations of in-context examples with chains of these formalized steps on the same examples. Program Trace Prompting is applicable to many tasks, achieving strong results on the 23 diverse tasks in the BIG-Bench Hard benchmark. More importantly, by instrumenting explanations in this way, we enable new types of analysis. In particular, we identify &#34;non-local errors&#34; (which correspond to incorrectly learning the reasoning method illustrated in the demonstrations) as an unaddressed issue in CoT learning, and we present methods for verifying the modularity of steps in a CoT explanation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15359v2-abstract-full').style.display = 'none'; document.getElementById('2409.15359v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14596">arXiv:2406.14596</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.14596">pdf</a>, <a href="https://arxiv.org/format/2406.14596">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> VLM Agents Generate Their Own Memories: Distilling Experience into Embodied Programs of Thought </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sarch%2C+G">Gabriel Sarch</a>, <a href="/search/cs?searchtype=author&amp;query=Jang%2C+L">Lawrence Jang</a>, <a href="/search/cs?searchtype=author&amp;query=Tarr%2C+M+J">Michael J. Tarr</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Marino%2C+K">Kenneth Marino</a>, <a href="/search/cs?searchtype=author&amp;query=Fragkiadaki%2C+K">Katerina Fragkiadaki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14596v4-abstract-short" style="display: inline;"> Large-scale generative language and vision-language models (LLMs and VLMs) excel in few-shot in-context learning for decision making and instruction following. However, they require high-quality exemplar demonstrations in their context window. In this work, we ask: Can LLMs and VLMs generate their own examples from generic, sub-optimal demonstrations? We propose In-Context Abstraction Learning (IC&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14596v4-abstract-full').style.display = 'inline'; document.getElementById('2406.14596v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14596v4-abstract-full" style="display: none;"> Large-scale generative language and vision-language models (LLMs and VLMs) excel in few-shot in-context learning for decision making and instruction following. However, they require high-quality exemplar demonstrations in their context window. In this work, we ask: Can LLMs and VLMs generate their own examples from generic, sub-optimal demonstrations? We propose In-Context Abstraction Learning (ICAL), a method that builds a memory of multimodal experience from sub-optimal demonstrations and human feedback. Given a task demonstration that may contain inefficiencies or mistakes, a VLM abstracts the trajectory into a generalized program of thoughts by correcting inefficient actions and annotating cognitive abstractions: causal relationships, object state changes, temporal subgoals, and task-relevant visual elements. These programs of thought are iteratively improved through human feedback while the agent executes the trajectory in a similar environment. The resulting examples significantly improve decision-making in retrieval-augmented LLM and VLM agents. Moreover, as the agent&#39;s library of examples grows, it becomes more efficient, relying less on human feedback and requiring fewer environment interactions per demonstration. Our ICAL agent surpasses the SOTA in dialogue-based instruction following in TEACh, multimodal web agents in VisualWebArena, and action anticipation in Ego4D. In TEACh, we achieve a 12.6% improvement in goal-condition success. In VisualWebArena, our task success rate improves over few-shot GPT4V. In Ego4D action forecasting, we improve over few-shot GPT-4V and remain competitive with supervised models. We show finetuning our retrieval-augmented in-context agent yields additional improvements. Our approach significantly reduces reliance on manual prompt engineering and consistently outperforms in-context learning from action plans that lack such programs of thought. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14596v4-abstract-full').style.display = 'none'; document.getElementById('2406.14596v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project website: http://ical-learning.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04291">arXiv:2406.04291</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.04291">pdf</a>, <a href="https://arxiv.org/format/2406.04291">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Stratified Prediction-Powered Inference for Hybrid Language Model Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fisch%2C+A">Adam Fisch</a>, <a href="/search/cs?searchtype=author&amp;query=Maynez%2C+J">Joshua Maynez</a>, <a href="/search/cs?searchtype=author&amp;query=Hofer%2C+R+A">R. Alex Hofer</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Globerson%2C+A">Amir Globerson</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04291v1-abstract-short" style="display: inline;"> Prediction-powered inference (PPI) is a method that improves statistical estimates based on limited human-labeled data. PPI achieves this by combining small amounts of human-labeled data with larger amounts of data labeled by a reasonably accurate -- but potentially biased -- automatic system, in a way that results in tighter confidence intervals for certain parameters of interest (e.g., the mean&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04291v1-abstract-full').style.display = 'inline'; document.getElementById('2406.04291v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04291v1-abstract-full" style="display: none;"> Prediction-powered inference (PPI) is a method that improves statistical estimates based on limited human-labeled data. PPI achieves this by combining small amounts of human-labeled data with larger amounts of data labeled by a reasonably accurate -- but potentially biased -- automatic system, in a way that results in tighter confidence intervals for certain parameters of interest (e.g., the mean performance of a language model). In this paper, we propose a method called Stratified Prediction-Powered Inference (StratPPI), in which we show that the basic PPI estimates can be considerably improved by employing simple data stratification strategies. Without making any assumptions on the underlying automatic labeling system or data distribution, we derive an algorithm for computing provably valid confidence intervals for population parameters (such as averages) that is based on stratified sampling. In particular, we show both theoretically and empirically that, with appropriate choices of stratification and sample allocation, our approach can provide substantially tighter confidence intervals than unstratified approaches. Specifically, StratPPI is expected to improve in cases where the performance of the autorater varies across different conditional distributions of the target data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04291v1-abstract-full').style.display = 'none'; document.getElementById('2406.04291v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.06034">arXiv:2405.06034</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.06034">pdf</a>, <a href="https://arxiv.org/format/2405.06034">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Prediction-Powered Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hofer%2C+R+A">R. Alex Hofer</a>, <a href="/search/cs?searchtype=author&amp;query=Maynez%2C+J">Joshua Maynez</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Fisch%2C+A">Adam Fisch</a>, <a href="/search/cs?searchtype=author&amp;query=Globerson%2C+A">Amir Globerson</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.06034v1-abstract-short" style="display: inline;"> Prediction-powered inference (PPI) is a method that improves statistical estimates based on limited human-labeled data. Specifically, PPI methods provide tighter confidence intervals by combining small amounts of human-labeled data with larger amounts of data labeled by a reasonably accurate, but potentially biased, automatic system. We propose a framework for PPI based on Bayesian inference that&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06034v1-abstract-full').style.display = 'inline'; document.getElementById('2405.06034v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.06034v1-abstract-full" style="display: none;"> Prediction-powered inference (PPI) is a method that improves statistical estimates based on limited human-labeled data. Specifically, PPI methods provide tighter confidence intervals by combining small amounts of human-labeled data with larger amounts of data labeled by a reasonably accurate, but potentially biased, automatic system. We propose a framework for PPI based on Bayesian inference that allows researchers to develop new task-appropriate PPI methods easily. Exploiting the ease with which we can design new metrics, we propose improved PPI methods for several importantcases, such as autoraters that give discrete responses (e.g., prompted LLM ``judges&#39;&#39;) and autoraters with scores that have a non-linear relationship to human scores. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06034v1-abstract-full').style.display = 'none'; document.getElementById('2405.06034v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.01952">arXiv:2401.01952</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.01952">pdf</a>, <a href="https://arxiv.org/format/2401.01952">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Instruct-Imagen: Image Generation with Multi-modal Instruction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hu%2C+H">Hexiang Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Chan%2C+K+C+K">Kelvin C. K. Chan</a>, <a href="/search/cs?searchtype=author&amp;query=Su%2C+Y">Yu-Chuan Su</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yandong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sohn%2C+K">Kihyuk Sohn</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yang Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Ben%2C+X">Xue Ben</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+B">Boqing Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W">William Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+M">Ming-Wei Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+X">Xuhui Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.01952v1-abstract-short" style="display: inline;"> This paper presents instruct-imagen, a model that tackles heterogeneous image generation tasks and generalizes across unseen tasks. We introduce *multi-modal instruction* for image generation, a task representation articulating a range of generation intents with precision. It uses natural language to amalgamate disparate modalities (e.g., text, edge, style, subject, etc.), such that abundant gener&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01952v1-abstract-full').style.display = 'inline'; document.getElementById('2401.01952v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.01952v1-abstract-full" style="display: none;"> This paper presents instruct-imagen, a model that tackles heterogeneous image generation tasks and generalizes across unseen tasks. We introduce *multi-modal instruction* for image generation, a task representation articulating a range of generation intents with precision. It uses natural language to amalgamate disparate modalities (e.g., text, edge, style, subject, etc.), such that abundant generation intents can be standardized in a uniform format. We then build instruct-imagen by fine-tuning a pre-trained text-to-image diffusion model with a two-stage framework. First, we adapt the model using the retrieval-augmented training, to enhance model&#39;s capabilities to ground its generation on external multimodal context. Subsequently, we fine-tune the adapted model on diverse image generation tasks that requires vision-language understanding (e.g., subject-driven generation, etc.), each paired with a multi-modal instruction encapsulating the task&#39;s essence. Human evaluation on various image generation datasets reveals that instruct-imagen matches or surpasses prior task-specific models in-domain and demonstrates promising generalization to unseen and more complex tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01952v1-abstract-full').style.display = 'none'; document.getElementById('2401.01952v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 18 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.10083">arXiv:2311.10083</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.10083">pdf</a>, <a href="https://arxiv.org/ps/2311.10083">ps</a>, <a href="https://arxiv.org/format/2311.10083">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Characterizing Tradeoffs in Language Model Decoding with Informational Interpretations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chang%2C+C">Chung-Ching Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Sung%2C+Y">Yun-Hsuan Sung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.10083v1-abstract-short" style="display: inline;"> We propose a theoretical framework for formulating language model decoder algorithms with dynamic programming and information theory. With dynamic programming, we lift the design of decoder algorithms from the logit space to the action-state value function space, and show that the decoding algorithms are consequences of optimizing the action-state value functions. Each component in the action-stat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10083v1-abstract-full').style.display = 'inline'; document.getElementById('2311.10083v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.10083v1-abstract-full" style="display: none;"> We propose a theoretical framework for formulating language model decoder algorithms with dynamic programming and information theory. With dynamic programming, we lift the design of decoder algorithms from the logit space to the action-state value function space, and show that the decoding algorithms are consequences of optimizing the action-state value functions. Each component in the action-state value function space has an information theoretical interpretation. With the lifting and interpretation, it becomes evident what the decoder algorithm is optimized for, and hence facilitating the arbitration of the tradeoffs in sensibleness, diversity, and attribution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10083v1-abstract-full').style.display = 'none'; document.getElementById('2311.10083v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.06697">arXiv:2311.06697</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.06697">pdf</a>, <a href="https://arxiv.org/format/2311.06697">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Trusted Source Alignment in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bashlovkina%2C+V">Vasilisa Bashlovkina</a>, <a href="/search/cs?searchtype=author&amp;query=Kuang%2C+Z">Zhaobin Kuang</a>, <a href="/search/cs?searchtype=author&amp;query=Matthews%2C+R">Riley Matthews</a>, <a href="/search/cs?searchtype=author&amp;query=Clifford%2C+E">Edward Clifford</a>, <a href="/search/cs?searchtype=author&amp;query=Jun%2C+Y">Yennie Jun</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Baumgartner%2C+S">Simon Baumgartner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.06697v1-abstract-short" style="display: inline;"> Large language models (LLMs) are trained on web-scale corpora that inevitably include contradictory factual information from sources of varying reliability. In this paper, we propose measuring an LLM property called trusted source alignment (TSA): the model&#39;s propensity to align with content produced by trusted publishers in the face of uncertainty or controversy. We present FactCheckQA, a TSA eva&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.06697v1-abstract-full').style.display = 'inline'; document.getElementById('2311.06697v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.06697v1-abstract-full" style="display: none;"> Large language models (LLMs) are trained on web-scale corpora that inevitably include contradictory factual information from sources of varying reliability. In this paper, we propose measuring an LLM property called trusted source alignment (TSA): the model&#39;s propensity to align with content produced by trusted publishers in the face of uncertainty or controversy. We present FactCheckQA, a TSA evaluation dataset based on a corpus of fact checking articles. We describe a simple protocol for evaluating TSA and offer a detailed analysis of design considerations including response extraction, claim contextualization, and bias in prompt formulation. Applying the protocol to PaLM-2, we find that as we scale up the model size, the model performance on FactCheckQA improves from near-random to up to 80% balanced accuracy in aligning with trusted sources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.06697v1-abstract-full').style.display = 'none'; document.getElementById('2311.06697v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.04886">arXiv:2311.04886</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.04886">pdf</a>, <a href="https://arxiv.org/format/2311.04886">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SEMQA: Semi-Extractive Multi-Source Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Schuster%2C+T">Tal Schuster</a>, <a href="/search/cs?searchtype=author&amp;query=Lelkes%2C+A+D">Adam D. Lelkes</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+J">Jai Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Berant%2C+J">Jonathan Berant</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Metzler%2C+D">Donald Metzler</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.04886v2-abstract-short" style="display: inline;"> Recently proposed long-form question answering (QA) systems, supported by large language models (LLMs), have shown promising capabilities. Yet, attributing and verifying their generated abstractive answers can be difficult, and automatically evaluating their accuracy remains an ongoing challenge. In this work, we introduce a new QA task for answering multi-answer questions by summarizing multipl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.04886v2-abstract-full').style.display = 'inline'; document.getElementById('2311.04886v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.04886v2-abstract-full" style="display: none;"> Recently proposed long-form question answering (QA) systems, supported by large language models (LLMs), have shown promising capabilities. Yet, attributing and verifying their generated abstractive answers can be difficult, and automatically evaluating their accuracy remains an ongoing challenge. In this work, we introduce a new QA task for answering multi-answer questions by summarizing multiple diverse sources in a semi-extractive fashion. Specifically, Semi-extractive Multi-source QA (SEMQA) requires models to output a comprehensive answer, while mixing factual quoted spans -- copied verbatim from given input sources -- and non-factual free-text connectors that glue these spans together into a single cohesive passage. This setting bridges the gap between the outputs of well-grounded but constrained extractive QA systems and more fluent but harder to attribute fully abstractive answers. Particularly, it enables a new mode for language models that leverages their advanced language generation capabilities, while also producing fine in-line attributions by-design that are easy to verify, interpret, and evaluate. To study this task, we create the first dataset of this kind, QuoteSum, with human-written semi-extractive answers to natural and generated questions, and define text-based evaluation metrics. Experimenting with several LLMs in various settings, we find this task to be surprisingly challenging, demonstrating the importance of QuoteSum for developing and studying such consolidation capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.04886v2-abstract-full').style.display = 'none'; document.getElementById('2311.04886v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.14903">arXiv:2308.14903</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.14903">pdf</a>, <a href="https://arxiv.org/format/2308.14903">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MEMORY-VQ: Compression for Tractable Internet-Scale Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&amp;query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&amp;query=Vilnis%2C+L">Luke Vilnis</a>, <a href="/search/cs?searchtype=author&amp;query=Onta%C3%B1%C3%B3n%2C+S">Santiago Onta帽贸n</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Sanghai%2C+S">Sumit Sanghai</a>, <a href="/search/cs?searchtype=author&amp;query=Ainslie%2C+J">Joshua Ainslie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.14903v1-abstract-short" style="display: inline;"> Retrieval augmentation is a powerful but expensive method to make language models more knowledgeable about the world. Memory-based methods like LUMEN pre-compute token representations for retrieved passages to drastically speed up inference. However, memory also leads to much greater storage requirements from storing pre-computed representations. We propose MEMORY-VQ, a new method to reduce stor&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.14903v1-abstract-full').style.display = 'inline'; document.getElementById('2308.14903v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.14903v1-abstract-full" style="display: none;"> Retrieval augmentation is a powerful but expensive method to make language models more knowledgeable about the world. Memory-based methods like LUMEN pre-compute token representations for retrieved passages to drastically speed up inference. However, memory also leads to much greater storage requirements from storing pre-computed representations. We propose MEMORY-VQ, a new method to reduce storage requirements of memory-augmented models without sacrificing performance. Our method uses a vector quantization variational autoencoder (VQ-VAE) to compress token representations. We apply MEMORY-VQ to the LUMEN model to obtain LUMEN-VQ, a memory model that achieves a 16x compression rate with comparable performance on the KILT benchmark. LUMEN-VQ enables practical retrieval augmentation even for extremely large retrieval corpora. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.14903v1-abstract-full').style.display = 'none'; document.getElementById('2308.14903v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.08661">arXiv:2308.08661</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.08661">pdf</a>, <a href="https://arxiv.org/format/2308.08661">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Answering Ambiguous Questions with a Database of Questions, Answers, and Revisions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.08661v1-abstract-short" style="display: inline;"> Many open-domain questions are under-specified and thus have multiple possible answers, each of which is correct under a different interpretation of the question. Answering such ambiguous questions is challenging, as it requires retrieving and then reasoning about diverse information from multiple passages. We present a new state-of-the-art for answering ambiguous questions that exploits a databas&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.08661v1-abstract-full').style.display = 'inline'; document.getElementById('2308.08661v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.08661v1-abstract-full" style="display: none;"> Many open-domain questions are under-specified and thus have multiple possible answers, each of which is correct under a different interpretation of the question. Answering such ambiguous questions is challenging, as it requires retrieving and then reasoning about diverse information from multiple passages. We present a new state-of-the-art for answering ambiguous questions that exploits a database of unambiguous questions generated from Wikipedia. On the challenging ASQA benchmark, which requires generating long-form answers that summarize the multiple answers to an ambiguous question, our method improves performance by 15% (relative improvement) on recall measures and 10% on measures which evaluate disambiguating questions from predicted outputs. Retrieving from the database of generated questions also gives large improvements in diverse passage retrieval (by matching user questions q to passages p indirectly, via questions q&#39; generated from p). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.08661v1-abstract-full').style.display = 'none'; document.getElementById('2308.08661v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.10231">arXiv:2306.10231</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.10231">pdf</a>, <a href="https://arxiv.org/format/2306.10231">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GLIMMER: generalized late-interaction memory reranker </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&amp;query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&amp;query=FitzGerald%2C+N">Nicholas FitzGerald</a>, <a href="/search/cs?searchtype=author&amp;query=Sanghai%2C+S">Sumit Sanghai</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Ainslie%2C+J">Joshua Ainslie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.10231v1-abstract-short" style="display: inline;"> Memory-augmentation is a powerful approach for efficiently incorporating external information into language models, but leads to reduced performance relative to retrieving text. Recent work introduced LUMEN, a memory-retrieval hybrid that partially pre-computes memory and updates memory representations on the fly with a smaller live encoder. We propose GLIMMER, which improves on this approach th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10231v1-abstract-full').style.display = 'inline'; document.getElementById('2306.10231v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.10231v1-abstract-full" style="display: none;"> Memory-augmentation is a powerful approach for efficiently incorporating external information into language models, but leads to reduced performance relative to retrieving text. Recent work introduced LUMEN, a memory-retrieval hybrid that partially pre-computes memory and updates memory representations on the fly with a smaller live encoder. We propose GLIMMER, which improves on this approach through 1) exploiting free access to the powerful memory representations by applying a shallow reranker on top of memory to drastically improve retrieval quality at low cost, and 2) incorporating multi-task training to learn a general and higher quality memory and live encoder. GLIMMER achieves strong gains in performance at faster speeds compared to LUMEN and FiD on the KILT benchmark of knowledge-intensive tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.10231v1-abstract-full').style.display = 'none'; document.getElementById('2306.10231v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.00186">arXiv:2304.00186</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.00186">pdf</a>, <a href="https://arxiv.org/format/2304.00186">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Subject-driven Text-to-Image Generation via Apprenticeship Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+H">Hexiang Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yandong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ruiz%2C+N">Nataniel Ruiz</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+X">Xuhui Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+M">Ming-Wei Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.00186v5-abstract-short" style="display: inline;"> Recent text-to-image generation models like DreamBooth have made remarkable progress in generating highly customized images of a target subject, by fine-tuning an ``expert model&#39;&#39; for a given subject from a few examples. However, this process is expensive, since a new expert model must be learned for each subject. In this paper, we present SuTI, a Subject-driven Text-to-Image generator that replac&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.00186v5-abstract-full').style.display = 'inline'; document.getElementById('2304.00186v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.00186v5-abstract-full" style="display: none;"> Recent text-to-image generation models like DreamBooth have made remarkable progress in generating highly customized images of a target subject, by fine-tuning an ``expert model&#39;&#39; for a given subject from a few examples. However, this process is expensive, since a new expert model must be learned for each subject. In this paper, we present SuTI, a Subject-driven Text-to-Image generator that replaces subject-specific fine tuning with in-context learning. Given a few demonstrations of a new subject, SuTI can instantly generate novel renditions of the subject in different scenes, without any subject-specific optimization. SuTI is powered by apprenticeship learning, where a single apprentice model is learned from data generated by a massive number of subject-specific expert models. Specifically, we mine millions of image clusters from the Internet, each centered around a specific visual subject. We adopt these clusters to train a massive number of expert models, each specializing in a different subject. The apprentice model SuTI then learns to imitate the behavior of these fine-tuned experts. SuTI can generate high-quality and customized subject-specific images 20x faster than optimization-based SoTA methods. On the challenging DreamBench and DreamBench-v2, our human evaluation shows that SuTI significantly outperforms existing models like InstructPix2Pix, Textual Inversion, Imagic, Prompt2Prompt, Re-Imagen and DreamBooth, especially on the subject and text alignment aspects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.00186v5-abstract-full').style.display = 'none'; document.getElementById('2304.00186v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS 2023. Model Service to be appear as Google Vertex AI - Instant Tuning (https://cloud.google.com/vertex-ai/docs/generative-ai/image/fine-tune-model). The link to demo video: https://www.youtube.com/watch?v=Q2xQ91D_dhM&amp;t=2071s&amp;ab_channel=GoogleCloud</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.10448">arXiv:2301.10448</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2301.10448">pdf</a>, <a href="https://arxiv.org/format/2301.10448">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Pre-computed memory or on-the-fly encoding? A hybrid approach to retrieval augmentation makes the most of your compute </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&amp;query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&amp;query=FitzGerald%2C+N">Nicholas FitzGerald</a>, <a href="/search/cs?searchtype=author&amp;query=Ainslie%2C+J">Joshua Ainslie</a>, <a href="/search/cs?searchtype=author&amp;query=Sanghai%2C+S">Sumit Sanghai</a>, <a href="/search/cs?searchtype=author&amp;query=Sha%2C+F">Fei Sha</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W">William Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.10448v2-abstract-short" style="display: inline;"> Retrieval-augmented language models such as Fusion-in-Decoder are powerful, setting the state of the art on a variety of knowledge-intensive tasks. However, they are also expensive, due to the need to encode a large number of retrieved passages. Some work avoids this cost by pre-encoding a text corpus into a memory and retrieving dense representations directly. However, pre-encoding memory incurs&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10448v2-abstract-full').style.display = 'inline'; document.getElementById('2301.10448v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.10448v2-abstract-full" style="display: none;"> Retrieval-augmented language models such as Fusion-in-Decoder are powerful, setting the state of the art on a variety of knowledge-intensive tasks. However, they are also expensive, due to the need to encode a large number of retrieved passages. Some work avoids this cost by pre-encoding a text corpus into a memory and retrieving dense representations directly. However, pre-encoding memory incurs a severe quality penalty as the memory representations are not conditioned on the current input. We propose LUMEN, a hybrid between these two extremes, pre-computing the majority of the retrieval representation and completing the encoding on the fly using a live encoder that is conditioned on the question and fine-tuned for the task. We show that LUMEN significantly outperforms pure memory on multiple question-answering tasks while being much cheaper than FiD, and outperforms both for any given compute budget. Moreover, the advantage of LUMEN over FiD increases with model size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10448v2-abstract-full').style.display = 'none'; document.getElementById('2301.10448v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.10726">arXiv:2212.10726</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.10726">pdf</a>, <a href="https://arxiv.org/format/2212.10726">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Beyond Contrastive Learning: A Variational Generative Model for Multilingual Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wieting%2C+J">John Wieting</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+J+H">Jonathan H. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&amp;query=Berg-Kirkpatrick%2C+T">Taylor Berg-Kirkpatrick</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.10726v2-abstract-short" style="display: inline;"> Contrastive learning has been successfully used for retrieval of semantically aligned sentences, but it often requires large batch sizes or careful engineering to work well. In this paper, we instead propose a generative model for learning multilingual text embeddings which can be used to retrieve or score sentence pairs. Our model operates on parallel data in $N$ languages and, through an approxi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10726v2-abstract-full').style.display = 'inline'; document.getElementById('2212.10726v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.10726v2-abstract-full" style="display: none;"> Contrastive learning has been successfully used for retrieval of semantically aligned sentences, but it often requires large batch sizes or careful engineering to work well. In this paper, we instead propose a generative model for learning multilingual text embeddings which can be used to retrieve or score sentence pairs. Our model operates on parallel data in $N$ languages and, through an approximation we introduce, efficiently encourages source separation in this multilingual setting, separating semantic information that is shared between translations from stylistic or language-specific variation. We show careful large-scale comparisons between contrastive and generation-based approaches for learning multilingual text embeddings, a comparison that has not been done to the best of our knowledge despite the popularity of these approaches. We evaluate this method on a suite of tasks including semantic similarity, bitext mining, and cross-lingual question retrieval -- the last of which we introduce in this paper. Overall, our Variational Multilingual Source-Separation Transformer (VMSST) model outperforms both a strong contrastive and generative baseline on these tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10726v2-abstract-full').style.display = 'none'; document.getElementById('2212.10726v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a long paper at ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.08153">arXiv:2212.08153</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.08153">pdf</a>, <a href="https://arxiv.org/format/2212.08153">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FiDO: Fusion-in-Decoder optimized for stronger performance and faster inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&amp;query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&amp;query=Ainslie%2C+J">Joshua Ainslie</a>, <a href="/search/cs?searchtype=author&amp;query=FitzGerald%2C+N">Nicholas FitzGerald</a>, <a href="/search/cs?searchtype=author&amp;query=Sanghai%2C+S">Sumit Sanghai</a>, <a href="/search/cs?searchtype=author&amp;query=Sha%2C+F">Fei Sha</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W">William Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.08153v2-abstract-short" style="display: inline;"> Fusion-in-Decoder (FiD) is a powerful retrieval-augmented language model that sets the state-of-the-art on many knowledge-intensive NLP tasks. However, the architecture used for FiD was chosen by making minimal modifications to a standard T5 model, which our analysis shows to be highly suboptimal for a retrieval-augmented model. In particular, FiD allocates the bulk of FLOPs to the encoder, while&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08153v2-abstract-full').style.display = 'inline'; document.getElementById('2212.08153v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.08153v2-abstract-full" style="display: none;"> Fusion-in-Decoder (FiD) is a powerful retrieval-augmented language model that sets the state-of-the-art on many knowledge-intensive NLP tasks. However, the architecture used for FiD was chosen by making minimal modifications to a standard T5 model, which our analysis shows to be highly suboptimal for a retrieval-augmented model. In particular, FiD allocates the bulk of FLOPs to the encoder, while the majority of inference time results from memory bandwidth constraints in the decoder. We propose two simple changes to the FiD architecture to alleviate memory bandwidth constraints, and speed up inference by 7x. This allows us to use a much larger decoder at modest cost. We denote FiD with the above modifications as FiDO, and show that it strongly improves performance over existing FiD models for a wide range of inference budgets. For example, FiDO-Large-XXL performs faster inference than FiD-Base and achieves better performance than FiD-Large. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08153v2-abstract-full').style.display = 'none'; document.getElementById('2212.08153v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL Findings 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.08037">arXiv:2212.08037</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.08037">pdf</a>, <a href="https://arxiv.org/format/2212.08037">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Attributed Question Answering: Evaluation and Modeling for Attributed Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bohnet%2C+B">Bernd Bohnet</a>, <a href="/search/cs?searchtype=author&amp;query=Tran%2C+V+Q">Vinh Q. Tran</a>, <a href="/search/cs?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&amp;query=Aharoni%2C+R">Roee Aharoni</a>, <a href="/search/cs?searchtype=author&amp;query=Andor%2C+D">Daniel Andor</a>, <a href="/search/cs?searchtype=author&amp;query=Soares%2C+L+B">Livio Baldini Soares</a>, <a href="/search/cs?searchtype=author&amp;query=Ciaramita%2C+M">Massimiliano Ciaramita</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenstein%2C+J">Jacob Eisenstein</a>, <a href="/search/cs?searchtype=author&amp;query=Ganchev%2C+K">Kuzman Ganchev</a>, <a href="/search/cs?searchtype=author&amp;query=Herzig%2C+J">Jonathan Herzig</a>, <a href="/search/cs?searchtype=author&amp;query=Hui%2C+K">Kai Hui</a>, <a href="/search/cs?searchtype=author&amp;query=Kwiatkowski%2C+T">Tom Kwiatkowski</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+J">Ji Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Ni%2C+J">Jianmo Ni</a>, <a href="/search/cs?searchtype=author&amp;query=Saralegui%2C+L+S">Lierni Sestorain Saralegui</a>, <a href="/search/cs?searchtype=author&amp;query=Schuster%2C+T">Tal Schuster</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Collins%2C+M">Michael Collins</a>, <a href="/search/cs?searchtype=author&amp;query=Das%2C+D">Dipanjan Das</a>, <a href="/search/cs?searchtype=author&amp;query=Metzler%2C+D">Donald Metzler</a>, <a href="/search/cs?searchtype=author&amp;query=Petrov%2C+S">Slav Petrov</a>, <a href="/search/cs?searchtype=author&amp;query=Webster%2C+K">Kellie Webster</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.08037v2-abstract-short" style="display: inline;"> Large language models (LLMs) have shown impressive results while requiring little or no direct supervision. Further, there is mounting evidence that LLMs may have potential in information-seeking scenarios. We believe the ability of an LLM to attribute the text that it generates is likely to be crucial in this setting. We formulate and study Attributed QA as a key first step in the development of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08037v2-abstract-full').style.display = 'inline'; document.getElementById('2212.08037v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.08037v2-abstract-full" style="display: none;"> Large language models (LLMs) have shown impressive results while requiring little or no direct supervision. Further, there is mounting evidence that LLMs may have potential in information-seeking scenarios. We believe the ability of an LLM to attribute the text that it generates is likely to be crucial in this setting. We formulate and study Attributed QA as a key first step in the development of attributed LLMs. We propose a reproducible evaluation framework for the task and benchmark a broad set of architectures. We take human annotations as a gold standard and show that a correlated automatic metric is suitable for development. Our experimental work gives concrete answers to two key questions (How to measure attribution?, and How well do current state-of-the-art methods perform on attribution?), and give some hints as to how to address a third (How to build LLMs with attribution?). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08037v2-abstract-full').style.display = 'none'; document.getElementById('2212.08037v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.12588">arXiv:2211.12588</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.12588">pdf</a>, <a href="https://arxiv.org/format/2211.12588">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Program of Thoughts Prompting: Disentangling Computation from Reasoning for Numerical Reasoning Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+X">Xueguang Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xinyi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.12588v4-abstract-short" style="display: inline;"> Recently, there has been significant progress in teaching language models to perform step-by-step reasoning to solve complex numerical reasoning tasks. Chain-of-thoughts prompting (CoT) is by far the state-of-art method for these tasks. CoT uses language models to perform both reasoning and computation in the multi-step `thought&#39; process. To disentangle computation from reasoning, we propose `Prog&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.12588v4-abstract-full').style.display = 'inline'; document.getElementById('2211.12588v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.12588v4-abstract-full" style="display: none;"> Recently, there has been significant progress in teaching language models to perform step-by-step reasoning to solve complex numerical reasoning tasks. Chain-of-thoughts prompting (CoT) is by far the state-of-art method for these tasks. CoT uses language models to perform both reasoning and computation in the multi-step `thought&#39; process. To disentangle computation from reasoning, we propose `Program of Thoughts&#39; (PoT), which uses language models (mainly Codex) to express the reasoning process as a program. The computation is relegated to an external computer, which executes the generated programs to derive the answer. We evaluate PoT on five math word problem datasets (GSM, AQuA, SVAMP, TabMWP, MultiArith) and three financial-QA datasets (FinQA, ConvFinQA, TATQA) for both few-shot and zero-shot setups. Under both few-shot and zero-shot settings, PoT can show an average performance gain over CoT by around 12\% across all the evaluated datasets. By combining PoT with self-consistency decoding, we can achieve SoTA performance on all math problem datasets and near-SoTA performance on financial datasets. All of our data and code are released in Github https://github.com/wenhuchen/Program-of-Thoughts <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.12588v4-abstract-full').style.display = 'none'; document.getElementById('2211.12588v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at TMLR 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.12378">arXiv:2210.12378</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.12378">pdf</a>, <a href="https://arxiv.org/format/2210.12378">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Correcting Diverse Factual Errors in Abstractive Summarization via Post-Editing and Language Model Infilling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Balachandran%2C+V">Vidhisha Balachandran</a>, <a href="/search/cs?searchtype=author&amp;query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Tsvetkov%2C+Y">Yulia Tsvetkov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.12378v2-abstract-short" style="display: inline;"> Abstractive summarization models often generate inconsistent summaries containing factual errors or hallucinated content. Recent works focus on correcting factual errors in generated summaries via post-editing. Such correction models are trained using adversarial non-factual summaries constructed using heuristic rules for injecting errors. However, generating non-factual summaries using heuristics&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12378v2-abstract-full').style.display = 'inline'; document.getElementById('2210.12378v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.12378v2-abstract-full" style="display: none;"> Abstractive summarization models often generate inconsistent summaries containing factual errors or hallucinated content. Recent works focus on correcting factual errors in generated summaries via post-editing. Such correction models are trained using adversarial non-factual summaries constructed using heuristic rules for injecting errors. However, generating non-factual summaries using heuristics often does not generalize well to actual model errors. In this work, we propose to generate hard, representative synthetic examples of non-factual summaries through infilling language models. With this data, we train a more robust fact-correction model to post-edit the summaries to improve factual consistency. Through quantitative and qualitative experiments on two popular summarization datasets -- CNN/DM and XSum -- we show that our approach vastly outperforms prior methods in correcting erroneous summaries. Our model -- FactEdit -- improves factuality scores by over ~11 points on CNN/DM and over ~31 points on XSum on average across multiple summarization models, producing more factual summaries while maintaining competitive summarization quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12378v2-abstract-full').style.display = 'none'; document.getElementById('2210.12378v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.02928">arXiv:2210.02928</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.02928">pdf</a>, <a href="https://arxiv.org/format/2210.02928">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MuRAG: Multimodal Retrieval-Augmented Generator for Open Question Answering over Images and Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+H">Hexiang Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.02928v2-abstract-short" style="display: inline;"> While language Models store a massive amount of world knowledge implicitly in their parameters, even very large models often fail to encode information about rare entities and events, while incurring huge computational costs. Recently, retrieval-augmented models, such as REALM, RAG, and RETRO, have incorporated world knowledge into language generation by leveraging an external non-parametric index&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.02928v2-abstract-full').style.display = 'inline'; document.getElementById('2210.02928v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.02928v2-abstract-full" style="display: none;"> While language Models store a massive amount of world knowledge implicitly in their parameters, even very large models often fail to encode information about rare entities and events, while incurring huge computational costs. Recently, retrieval-augmented models, such as REALM, RAG, and RETRO, have incorporated world knowledge into language generation by leveraging an external non-parametric index and have demonstrated impressive performance with constrained model sizes. However, these methods are restricted to retrieving only textual knowledge, neglecting the ubiquitous amount of knowledge in other modalities like images -- much of which contains information not covered by any text. To address this limitation, we propose the first Multimodal Retrieval-Augmented Transformer (MuRAG), which accesses an external non-parametric multimodal memory to augment language generation. MuRAG is pre-trained with a mixture of large-scale image-text and text-only corpora using a joint contrastive and generative loss. We perform experiments on two different datasets that require retrieving and reasoning over both images and text to answer a given query: WebQA, and MultimodalQA. Our results show that MuRAG achieves state-of-the-art accuracy, outperforming existing models by 10-20\% absolute on both datasets and under both distractor and full-wiki settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.02928v2-abstract-full').style.display = 'none'; document.getElementById('2210.02928v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2022 main conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.14491">arXiv:2209.14491</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.14491">pdf</a>, <a href="https://arxiv.org/format/2209.14491">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Re-Imagen: Retrieval-Augmented Text-to-Image Generator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+H">Hexiang Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Saharia%2C+C">Chitwan Saharia</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.14491v3-abstract-short" style="display: inline;"> Research on text-to-image generation has witnessed significant progress in generating diverse and photo-realistic images, driven by diffusion and auto-regressive models trained on large-scale image-text data. Though state-of-the-art models can generate high-quality images of common entities, they often have difficulty generating images of uncommon entities, such as `Chortai (dog)&#39; or `Picarones (f&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.14491v3-abstract-full').style.display = 'inline'; document.getElementById('2209.14491v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.14491v3-abstract-full" style="display: none;"> Research on text-to-image generation has witnessed significant progress in generating diverse and photo-realistic images, driven by diffusion and auto-regressive models trained on large-scale image-text data. Though state-of-the-art models can generate high-quality images of common entities, they often have difficulty generating images of uncommon entities, such as `Chortai (dog)&#39; or `Picarones (food)&#39;. To tackle this issue, we present the Retrieval-Augmented Text-to-Image Generator (Re-Imagen), a generative model that uses retrieved information to produce high-fidelity and faithful images, even for rare or unseen entities. Given a text prompt, Re-Imagen accesses an external multi-modal knowledge base to retrieve relevant (image, text) pairs and uses them as references to generate the image. With this retrieval step, Re-Imagen is augmented with the knowledge of high-level semantics and low-level visual details of the mentioned entities, and thus improves its accuracy in generating the entities&#39; visual appearances. We train Re-Imagen on a constructed dataset containing (image, text, retrieval) triples to teach the model to ground on both text prompt and retrieval. Furthermore, we develop a new sampling strategy to interleave the classifier-free guidance for text and retrieval conditions to balance the text and retrieval alignment. Re-Imagen achieves significant gain on FID score over COCO and WikiImage. To further evaluate the capabilities of the model, we introduce EntityDrawBench, a new benchmark that evaluates image generation for diverse entities, from frequent to rare, across multiple object categories including dogs, foods, landmarks, birds, and characters. Human evaluation on EntityDrawBench shows that Re-Imagen can significantly improve the fidelity of generated images, especially on less frequent entities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.14491v3-abstract-full').style.display = 'none'; document.getElementById('2209.14491v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.12153">arXiv:2209.12153</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.12153">pdf</a>, <a href="https://arxiv.org/format/2209.12153">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> WinoDict: Probing language models for in-context word acquisition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Eisenschlos%2C+J+M">Julian Martin Eisenschlos</a>, <a href="/search/cs?searchtype=author&amp;query=Cole%2C+J+R">Jeremy R. Cole</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+F">Fangyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.12153v1-abstract-short" style="display: inline;"> We introduce a new in-context learning paradigm to measure Large Language Models&#39; (LLMs) ability to learn novel words during inference. In particular, we rewrite Winograd-style co-reference resolution problems by replacing the key concept word with a synthetic but plausible word that the model must understand to complete the task. Solving this task requires the model to make use of the dictionary&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12153v1-abstract-full').style.display = 'inline'; document.getElementById('2209.12153v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.12153v1-abstract-full" style="display: none;"> We introduce a new in-context learning paradigm to measure Large Language Models&#39; (LLMs) ability to learn novel words during inference. In particular, we rewrite Winograd-style co-reference resolution problems by replacing the key concept word with a synthetic but plausible word that the model must understand to complete the task. Solving this task requires the model to make use of the dictionary definition of the new word given in the prompt. This benchmark addresses word acquisition, one important aspect of the diachronic degradation known to afflict LLMs. As LLMs are frozen in time at the moment they are trained, they are normally unable to reflect the way language changes over time. We show that the accuracy of LLMs compared to the original Winograd tasks decreases radically in our benchmark, thus identifying a limitation of current models and providing a benchmark to measure future improvements in LLMs ability to do in-context learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12153v1-abstract-full').style.display = 'none'; document.getElementById('2209.12153v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.00630">arXiv:2207.00630</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2207.00630">pdf</a>, <a href="https://arxiv.org/format/2207.00630">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> QA Is the New KR: Question-Answer Pairs as Knowledge Bases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=De+Jong%2C+M">Michiel De Jong</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+N">Nitish Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Presta%2C+A">Alessandro Presta</a>, <a href="/search/cs?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&amp;query=Wieting%2C+J">John Wieting</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.00630v1-abstract-short" style="display: inline;"> In this position paper, we propose a new approach to generating a type of knowledge base (KB) from text, based on question generation and entity linking. We argue that the proposed type of KB has many of the key advantages of a traditional symbolic KB: in particular, it consists of small modular components, which can be combined compositionally to answer complex queries, including relational queri&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.00630v1-abstract-full').style.display = 'inline'; document.getElementById('2207.00630v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.00630v1-abstract-full" style="display: none;"> In this position paper, we propose a new approach to generating a type of knowledge base (KB) from text, based on question generation and entity linking. We argue that the proposed type of KB has many of the key advantages of a traditional symbolic KB: in particular, it consists of small modular components, which can be combined compositionally to answer complex queries, including relational queries and queries involving &#34;multi-hop&#34; inferences. However, unlike a traditional KB, this information store is well-aligned with common user information needs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.00630v1-abstract-full').style.display = 'none'; document.getElementById('2207.00630v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.12898">arXiv:2205.12898</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.12898">pdf</a>, <a href="https://arxiv.org/format/2205.12898">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Reasoning over Logically Interacted Conditions for Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.12898v1-abstract-short" style="display: inline;"> Some questions have multiple answers that are not equally correct, i.e. answers are different under different conditions. Conditions are used to distinguish answers as well as to provide additional information to support them. In this paper, we study a more challenging task where answers are constrained by a list of conditions that logically interact, which requires performing logical reasoning ov&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12898v1-abstract-full').style.display = 'inline'; document.getElementById('2205.12898v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.12898v1-abstract-full" style="display: none;"> Some questions have multiple answers that are not equally correct, i.e. answers are different under different conditions. Conditions are used to distinguish answers as well as to provide additional information to support them. In this paper, we study a more challenging task where answers are constrained by a list of conditions that logically interact, which requires performing logical reasoning over the conditions to determine the correctness of the answers. Even more challenging, we only provide evidences for a subset of the conditions, so some questions may not have deterministic answers. In such cases, models are asked to find probable answers and identify conditions that need to be satisfied to make the answers correct. We propose a new model, TReasoner, for this challenging reasoning task. TReasoner consists of an entailment module, a reasoning module, and a generation module (if the answers are free-form text spans). TReasoner achieves state-of-the-art performance on two benchmark conditional QA datasets, outperforming the previous state-of-the-art by 3-10 points. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12898v1-abstract-full').style.display = 'none'; document.getElementById('2205.12898v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.04581">arXiv:2204.04581</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.04581">pdf</a>, <a href="https://arxiv.org/format/2204.04581">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Augmenting Pre-trained Language Models with QA-Memory for Open-Domain Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&amp;query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&amp;query=Wieting%2C+J">John Wieting</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W">William Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.04581v3-abstract-short" style="display: inline;"> Retrieval augmented language models have recently become the standard for knowledge intensive tasks. Rather than relying purely on latent semantics within the parameters of large neural models, these methods enlist a semi-parametric memory to encode an index of knowledge for the model to retrieve over. Most prior work has employed text passages as the unit of knowledge, which has high coverage at&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.04581v3-abstract-full').style.display = 'inline'; document.getElementById('2204.04581v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.04581v3-abstract-full" style="display: none;"> Retrieval augmented language models have recently become the standard for knowledge intensive tasks. Rather than relying purely on latent semantics within the parameters of large neural models, these methods enlist a semi-parametric memory to encode an index of knowledge for the model to retrieve over. Most prior work has employed text passages as the unit of knowledge, which has high coverage at the cost of interpretability, controllability, and efficiency. The opposite properties arise in other methods which have instead relied on knowledge base (KB) facts. At the same time, more recent work has demonstrated the effectiveness of storing and retrieving from an index of Q-A pairs derived from text \citep{lewis2021paq}. This approach yields a high coverage knowledge representation that maintains KB-like properties due to its representations being more atomic units of information. In this work we push this line of research further by proposing a question-answer augmented encoder-decoder model and accompanying pretraining strategy. This yields an end-to-end system that not only outperforms prior QA retrieval methods on single-hop QA tasks but also enables compositional reasoning, as demonstrated by strong performance on two multi-hop QA datasets. Together, these methods improve the ability to interpret and control the model while narrowing the performance gap with passage retrieval systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.04581v3-abstract-full').style.display = 'none'; document.getElementById('2204.04581v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.06991">arXiv:2202.06991</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2202.06991">pdf</a>, <a href="https://arxiv.org/format/2202.06991">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Transformer Memory as a Differentiable Search Index </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tay%2C+Y">Yi Tay</a>, <a href="/search/cs?searchtype=author&amp;query=Tran%2C+V+Q">Vinh Q. Tran</a>, <a href="/search/cs?searchtype=author&amp;query=Dehghani%2C+M">Mostafa Dehghani</a>, <a href="/search/cs?searchtype=author&amp;query=Ni%2C+J">Jianmo Ni</a>, <a href="/search/cs?searchtype=author&amp;query=Bahri%2C+D">Dara Bahri</a>, <a href="/search/cs?searchtype=author&amp;query=Mehta%2C+H">Harsh Mehta</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+Z">Zhen Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Hui%2C+K">Kai Hui</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Z">Zhe Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+J">Jai Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Schuster%2C+T">Tal Schuster</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Metzler%2C+D">Donald Metzler</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.06991v3-abstract-short" style="display: inline;"> In this paper, we demonstrate that information retrieval can be accomplished with a single Transformer, in which all information about the corpus is encoded in the parameters of the model. To this end, we introduce the Differentiable Search Index (DSI), a new paradigm that learns a text-to-text model that maps string queries directly to relevant docids; in other words, a DSI model answers queries&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06991v3-abstract-full').style.display = 'inline'; document.getElementById('2202.06991v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.06991v3-abstract-full" style="display: none;"> In this paper, we demonstrate that information retrieval can be accomplished with a single Transformer, in which all information about the corpus is encoded in the parameters of the model. To this end, we introduce the Differentiable Search Index (DSI), a new paradigm that learns a text-to-text model that maps string queries directly to relevant docids; in other words, a DSI model answers queries directly using only its parameters, dramatically simplifying the whole retrieval process. We study variations in how documents and their identifiers are represented, variations in training procedures, and the interplay between models and corpus sizes. Experiments demonstrate that given appropriate design choices, DSI significantly outperforms strong baselines such as dual encoder models. Moreover, DSI demonstrates strong generalization capabilities, outperforming a BM25 baseline in a zero-shot setup. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06991v3-abstract-full').style.display = 'none'; document.getElementById('2202.06991v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.09669">arXiv:2112.09669</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2112.09669">pdf</a>, <a href="https://arxiv.org/format/2112.09669">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Explain, Edit, and Understand: Rethinking User Study Design for Evaluating Model Explanations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Arora%2C+S">Siddhant Arora</a>, <a href="/search/cs?searchtype=author&amp;query=Pruthi%2C+D">Danish Pruthi</a>, <a href="/search/cs?searchtype=author&amp;query=Sadeh%2C+N">Norman Sadeh</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Lipton%2C+Z+C">Zachary C. Lipton</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.09669v2-abstract-short" style="display: inline;"> In attempts to &#34;explain&#34; predictions of machine learning models, researchers have proposed hundreds of techniques for attributing predictions to features that are deemed important. While these attributions are often claimed to hold the potential to improve human &#34;understanding&#34; of the models, surprisingly little work explicitly evaluates progress towards this aspiration. In this paper, we conduct&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.09669v2-abstract-full').style.display = 'inline'; document.getElementById('2112.09669v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.09669v2-abstract-full" style="display: none;"> In attempts to &#34;explain&#34; predictions of machine learning models, researchers have proposed hundreds of techniques for attributing predictions to features that are deemed important. While these attributions are often claimed to hold the potential to improve human &#34;understanding&#34; of the models, surprisingly little work explicitly evaluates progress towards this aspiration. In this paper, we conduct a crowdsourcing study, where participants interact with deception detection models that have been trained to distinguish between genuine and fake hotel reviews. They are challenged both to simulate the model on fresh reviews, and to edit reviews with the goal of lowering the probability of the originally predicted class. Successful manipulations would lead to an adversarial example. During the training (but not the test) phase, input spans are highlighted to communicate salience. Through our evaluation, we observe that for a linear bag-of-words model, participants with access to the feature coefficients during training are able to cause a larger reduction in model confidence in the testing phase when compared to the no-explanation control. For the BERT-based classifier, popular local explanations do not improve their ability to reduce the model confidence over the no-explanation case. Remarkably, when the explanation for the BERT model is given by the (global) attributions of a linear model trained to imitate the BERT model, people can effectively manipulate the model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.09669v2-abstract-full').style.display = 'none'; document.getElementById('2112.09669v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.06884">arXiv:2110.06884</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.06884">pdf</a>, <a href="https://arxiv.org/format/2110.06884">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ConditionalQA: A Complex Reading Comprehension Dataset with Conditional Answers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.06884v1-abstract-short" style="display: inline;"> We describe a Question Answering (QA) dataset that contains complex questions with conditional answers, i.e. the answers are only applicable when certain conditions apply. We call this dataset ConditionalQA. In addition to conditional answers, the dataset also features: (1) long context documents with information that is related in logically complex ways; (2) multi-hop questions that require compo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.06884v1-abstract-full').style.display = 'inline'; document.getElementById('2110.06884v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.06884v1-abstract-full" style="display: none;"> We describe a Question Answering (QA) dataset that contains complex questions with conditional answers, i.e. the answers are only applicable when certain conditions apply. We call this dataset ConditionalQA. In addition to conditional answers, the dataset also features: (1) long context documents with information that is related in logically complex ways; (2) multi-hop questions that require compositional logical reasoning; (3) a combination of extractive questions, yes/no questions, questions with multiple answers, and not-answerable questions; (4) questions asked without knowing the answers. We show that ConditionalQA is challenging for many of the existing QA models, especially in selecting answer conditions. We believe that this dataset will motivate further research in answering complex questions over long documents. Data and leaderboard are publicly available at \url{https://github.com/haitian-sun/ConditionalQA}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.06884v1-abstract-full').style.display = 'none'; document.getElementById('2110.06884v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.06176">arXiv:2110.06176</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.06176">pdf</a>, <a href="https://arxiv.org/format/2110.06176">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mention Memory: incorporating textual knowledge into Transformers through entity mention attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=de+Jong%2C+M">Michiel de Jong</a>, <a href="/search/cs?searchtype=author&amp;query=Zemlyanskiy%2C+Y">Yury Zemlyanskiy</a>, <a href="/search/cs?searchtype=author&amp;query=FitzGerald%2C+N">Nicholas FitzGerald</a>, <a href="/search/cs?searchtype=author&amp;query=Sha%2C+F">Fei Sha</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W">William Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.06176v2-abstract-short" style="display: inline;"> Natural language understanding tasks such as open-domain question answering often require retrieving and assimilating factual information from multiple sources. We propose to address this problem by integrating a semi-parametric representation of a large text corpus into a Transformer model as a source of factual knowledge. Specifically, our method represents knowledge with `mention memory&#39;, a tab&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.06176v2-abstract-full').style.display = 'inline'; document.getElementById('2110.06176v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.06176v2-abstract-full" style="display: none;"> Natural language understanding tasks such as open-domain question answering often require retrieving and assimilating factual information from multiple sources. We propose to address this problem by integrating a semi-parametric representation of a large text corpus into a Transformer model as a source of factual knowledge. Specifically, our method represents knowledge with `mention memory&#39;, a table of dense vector representations of every entity mention in a corpus. The proposed model - TOME - is a Transformer that accesses the information through internal memory layers in which each entity mention in the input passage attends to the mention memory. This approach enables synthesis of and reasoning over many disparate sources of information within a single Transformer model. In experiments using a memory of 150 million Wikipedia mentions, TOME achieves strong performance on several open-domain knowledge-intensive tasks, including the claim verification benchmarks HoVer and FEVER and several entity-based QA benchmarks. We also show that the model learns to attend to informative mentions without any direct supervision. Finally we demonstrate that the model can generalize to new unseen entities by updating the memory without retraining. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.06176v2-abstract-full').style.display = 'none'; document.getElementById('2110.06176v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.14364">arXiv:2109.14364</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.14364">pdf</a>, <a href="https://arxiv.org/format/2109.14364">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Multilingual Fact Linking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kolluru%2C+K">Keshav Kolluru</a>, <a href="/search/cs?searchtype=author&amp;query=Rezk%2C+M">Martin Rezk</a>, <a href="/search/cs?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.14364v2-abstract-short" style="display: inline;"> Knowledge-intensive NLP tasks can benefit from linking natural language text with facts from a Knowledge Graph (KG). Although facts themselves are language-agnostic, the fact labels (i.e., language-specific representation of the fact) in the KG are often present only in a few languages. This makes it challenging to link KG facts to sentences in languages other than the limited set of languages. To&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.14364v2-abstract-full').style.display = 'inline'; document.getElementById('2109.14364v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.14364v2-abstract-full" style="display: none;"> Knowledge-intensive NLP tasks can benefit from linking natural language text with facts from a Knowledge Graph (KG). Although facts themselves are language-agnostic, the fact labels (i.e., language-specific representation of the fact) in the KG are often present only in a few languages. This makes it challenging to link KG facts to sentences in languages other than the limited set of languages. To address this problem, we introduce the task of Multilingual Fact Linking (MFL) where the goal is to link fact expressed in a sentence to corresponding fact in the KG, even when the fact label in the KG is not available in the language of the sentence. To facilitate research in this area, we present a new evaluation dataset, IndicLink. This dataset contains 11,293 linked WikiData facts and 6,429 sentences spanning English and six Indian languages. We propose a Retrieval+Generation model, ReFCoG, that can scale to millions of KG facts by combining Dual Encoder based retrieval with a Seq2Seq based generation model which is constrained to output only valid KG facts. ReFCoG outperforms standard Retrieval+Re-ranking models by 10.7 pts in Precision@1. In spite of this gain, the model achieves an overall score of 52.1, showing ample scope for improvement in the task.ReFCoG code and IndicLink data are available at https://github.com/SaiKeshav/mfl <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.14364v2-abstract-full').style.display = 'none'; document.getElementById('2109.14364v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AKBC 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.04312">arXiv:2109.04312</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2109.04312">pdf</a>, <a href="https://arxiv.org/format/2109.04312">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MATE: Multi-view Attention for Table Transformer Efficiency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Eisenschlos%2C+J+M">Julian Martin Eisenschlos</a>, <a href="/search/cs?searchtype=author&amp;query=Gor%2C+M">Maharshi Gor</a>, <a href="/search/cs?searchtype=author&amp;query=M%C3%BCller%2C+T">Thomas M眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.04312v1-abstract-short" style="display: inline;"> This work presents a sparse-attention Transformer architecture for modeling documents that contain large tables. Tables are ubiquitous on the web, and are rich in information. However, more than 20% of relational tables on the web have 20 or more rows (Cafarella et al., 2008), and these large tables present a challenge for current Transformer models, which are typically limited to 512 tokens. Here&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.04312v1-abstract-full').style.display = 'inline'; document.getElementById('2109.04312v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.04312v1-abstract-full" style="display: none;"> This work presents a sparse-attention Transformer architecture for modeling documents that contain large tables. Tables are ubiquitous on the web, and are rich in information. However, more than 20% of relational tables on the web have 20 or more rows (Cafarella et al., 2008), and these large tables present a challenge for current Transformer models, which are typically limited to 512 tokens. Here we propose MATE, a novel Transformer architecture designed to model the structure of web tables. MATE uses sparse attention in a way that allows heads to efficiently attend to either rows or columns in a table. This architecture scales linearly with respect to speed and memory, and can handle documents containing more than 8000 tokens with current accelerators. MATE also has a more appropriate inductive bias for tabular data, and sets a new state-of-the-art for three table reasoning datasets. For HybridQA (Chen et al., 2020b), a dataset that involves large documents containing tables, we improve the best prior result by 19 points. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.04312v1-abstract-full').style.display = 'none'; document.getElementById('2109.04312v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.15110">arXiv:2106.15110</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.15110">pdf</a>, <a href="https://arxiv.org/format/2106.15110">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1162/tacl_a_00459">10.1162/tacl_a_00459 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Time-Aware Language Models as Temporal Knowledge Bases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Cole%2C+J+R">Jeremy R. Cole</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenschlos%2C+J+M">Julian Martin Eisenschlos</a>, <a href="/search/cs?searchtype=author&amp;query=Gillick%2C+D">Daniel Gillick</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenstein%2C+J">Jacob Eisenstein</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.15110v2-abstract-short" style="display: inline;"> Many facts come with an expiration date, from the name of the President to the basketball team Lebron James plays for. But language models (LMs) are trained on snapshots of data collected at a specific moment in time, and this can limit their utility, especially in the closed-book setting where the pretraining corpus must contain the facts the model should memorize. We introduce a diagnostic datas&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.15110v2-abstract-full').style.display = 'inline'; document.getElementById('2106.15110v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.15110v2-abstract-full" style="display: none;"> Many facts come with an expiration date, from the name of the President to the basketball team Lebron James plays for. But language models (LMs) are trained on snapshots of data collected at a specific moment in time, and this can limit their utility, especially in the closed-book setting where the pretraining corpus must contain the facts the model should memorize. We introduce a diagnostic dataset aimed at probing LMs for factual knowledge that changes over time and highlight problems with LMs at either end of the spectrum -- those trained on specific slices of temporal data, as well as those trained on a wide range of temporal data. To mitigate these problems, we propose a simple technique for jointly modeling text with its timestamp. This improves memorization of seen facts from the training time period, as well as calibration on predictions about unseen facts from future time periods. We also show that models trained with temporal context can be efficiently &#34;refreshed&#34; as new data arrives, without the need for retraining from scratch. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.15110v2-abstract-full').style.display = 'none'; document.getElementById('2106.15110v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Version accepted to TACL</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Transactions of the Association for Computational Linguistics 2022; 10 257-273 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.00200">arXiv:2106.00200</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.00200">pdf</a>, <a href="https://arxiv.org/format/2106.00200">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Iterative Hierarchical Attention for Answering Complex Questions over Long Documents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.00200v2-abstract-short" style="display: inline;"> We propose a new model, DocHopper, that iteratively attends to different parts of long, hierarchically structured documents to answer complex questions. Similar to multi-hop question-answering (QA) systems, at each step, DocHopper uses a query $q$ to attend to information from a document, combines this ``retrieved&#39;&#39; information with $q$ to produce the next query. However, in contrast to most previ&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.00200v2-abstract-full').style.display = 'inline'; document.getElementById('2106.00200v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.00200v2-abstract-full" style="display: none;"> We propose a new model, DocHopper, that iteratively attends to different parts of long, hierarchically structured documents to answer complex questions. Similar to multi-hop question-answering (QA) systems, at each step, DocHopper uses a query $q$ to attend to information from a document, combines this ``retrieved&#39;&#39; information with $q$ to produce the next query. However, in contrast to most previous multi-hop QA systems, DocHopper is able to ``retrieve&#39;&#39; either short passages or long sections of the document, thus emulating a multi-step process of ``navigating&#39;&#39; through a long document to answer a question. To enable this novel behavior, DocHopper does not combine document information with $q$ by concatenating text to the text of $q$, but by combining a compact neural representation of $q$ with a compact neural representation of a hierarchical part of the document, which can potentially be quite large. We experiment with DocHopper on four different QA tasks that require reading long and complex documents to answer multi-hop questions, and show that DocHopper achieves state-of-the-art results on three of the datasets. Additionally, DocHopper is efficient at inference time, being 3--10 times faster than the baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.00200v2-abstract-full').style.display = 'none'; document.getElementById('2106.00200v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.01940">arXiv:2104.01940</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2104.01940">pdf</a>, <a href="https://arxiv.org/ps/2104.01940">ps</a>, <a href="https://arxiv.org/format/2104.01940">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> What&#39;s the best place for an AI conference, Vancouver or ______: Why completing comparative questions is difficult </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zagoury%2C+A">Avishai Zagoury</a>, <a href="/search/cs?searchtype=author&amp;query=Minkov%2C+E">Einat Minkov</a>, <a href="/search/cs?searchtype=author&amp;query=Szpektor%2C+I">Idan Szpektor</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.01940v1-abstract-short" style="display: inline;"> Although large neural language models (LMs) like BERT can be finetuned to yield state-of-the-art results on many NLP tasks, it is often unclear what these models actually learn. Here we study using such LMs to fill in entities in human-authored comparative questions, like ``Which country is older, India or ______?&#39;&#39; -- i.e., we study the ability of neural LMs to ask (not answer) reasonable questio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.01940v1-abstract-full').style.display = 'inline'; document.getElementById('2104.01940v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.01940v1-abstract-full" style="display: none;"> Although large neural language models (LMs) like BERT can be finetuned to yield state-of-the-art results on many NLP tasks, it is often unclear what these models actually learn. Here we study using such LMs to fill in entities in human-authored comparative questions, like ``Which country is older, India or ______?&#39;&#39; -- i.e., we study the ability of neural LMs to ask (not answer) reasonable questions. We show that accuracy in this fill-in-the-blank task is well-correlated with human judgements of whether a question is reasonable, and that these models can be trained to achieve nearly human-level performance in completing comparative questions in three different subdomains. However, analysis shows that what they learn fails to model any sort of broad notion of which entities are semantically comparable or similar -- instead the trained models are very domain-specific, and performance is highly correlated with co-occurrences between specific entities observed in the training set. This is true both for models that are pretrained on general text corpora, as well as models trained on a large corpus of comparison questions. Our study thus reinforces recent results on the difficulty of making claims about a deep model&#39;s world knowledge or linguistic competence based on performance on specific benchmark problems. We make our evaluation datasets publicly available to foster future research on complex understanding and reasoning in such models at standards of human interaction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.01940v1-abstract-full').style.display = 'none'; document.getElementById('2104.01940v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2021; preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.07043">arXiv:2102.07043</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2102.07043">pdf</a>, <a href="https://arxiv.org/format/2102.07043">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Reasoning Over Virtual Knowledge Bases With Open Predicate Relations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.07043v2-abstract-short" style="display: inline;"> We present the Open Predicate Query Language (OPQL); a method for constructing a virtual KB (VKB) trained entirely from text. Large Knowledge Bases (KBs) are indispensable for a wide-range of industry applications such as question answering and recommendation. Typically, KBs encode world knowledge in a structured, readily accessible form derived from laborious human annotation efforts. Unfortunate&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.07043v2-abstract-full').style.display = 'inline'; document.getElementById('2102.07043v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.07043v2-abstract-full" style="display: none;"> We present the Open Predicate Query Language (OPQL); a method for constructing a virtual KB (VKB) trained entirely from text. Large Knowledge Bases (KBs) are indispensable for a wide-range of industry applications such as question answering and recommendation. Typically, KBs encode world knowledge in a structured, readily accessible form derived from laborious human annotation efforts. Unfortunately, while they are extremely high precision, KBs are inevitably highly incomplete and automated methods for enriching them are far too inaccurate. Instead, OPQL constructs a VKB by encoding and indexing a set of relation mentions in a way that naturally enables reasoning and can be trained without any structured supervision. We demonstrate that OPQL outperforms prior VKB methods on two different KB reasoning tasks and, additionally, can be used as an external memory integrated into a language model (OPQL-LM) leading to improvements on two open-domain question answering tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.07043v2-abstract-full').style.display = 'none'; document.getElementById('2102.07043v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the 38th International Conference on Machine Learning, PMLR 139, 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.00893">arXiv:2012.00893</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2012.00893">pdf</a>, <a href="https://arxiv.org/format/2012.00893">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Explanations: How much do explanations from the teacher aid students? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pruthi%2C+D">Danish Pruthi</a>, <a href="/search/cs?searchtype=author&amp;query=Bansal%2C+R">Rachit Bansal</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Soares%2C+L+B">Livio Baldini Soares</a>, <a href="/search/cs?searchtype=author&amp;query=Collins%2C+M">Michael Collins</a>, <a href="/search/cs?searchtype=author&amp;query=Lipton%2C+Z+C">Zachary C. Lipton</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.00893v2-abstract-short" style="display: inline;"> While many methods purport to explain predictions by highlighting salient features, what aims these explanations serve and how they ought to be evaluated often go unstated. In this work, we introduce a framework to quantify the value of explanations via the accuracy gains that they confer on a student model trained to simulate a teacher model. Crucially, the explanations are available to the stude&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.00893v2-abstract-full').style.display = 'inline'; document.getElementById('2012.00893v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.00893v2-abstract-full" style="display: none;"> While many methods purport to explain predictions by highlighting salient features, what aims these explanations serve and how they ought to be evaluated often go unstated. In this work, we introduce a framework to quantify the value of explanations via the accuracy gains that they confer on a student model trained to simulate a teacher model. Crucially, the explanations are available to the student during training, but are not available at test time. Compared to prior proposals, our approach is less easily gamed, enabling principled, automatic, model-agnostic evaluation of attributions. Using our framework, we compare numerous attribution methods for text classification and question answering, and observe quantitative differences that are consistent (to a moderate to high degree) across different student model architectures and learning strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.00893v2-abstract-full').style.display = 'none'; document.getElementById('2012.00893v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">TACL 2021 (pre-MIT Press publication version)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.14439">arXiv:2010.14439</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.14439">pdf</a>, <a href="https://arxiv.org/format/2010.14439">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Differentiable Open-Ended Commonsense Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+B+Y">Bill Yuchen Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Zaheer%2C+M">Manzil Zaheer</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+X">Xiang Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.14439v2-abstract-short" style="display: inline;"> Current commonsense reasoning research focuses on developing models that use commonsense knowledge to answer multiple-choice questions. However, systems designed to answer multiple-choice questions may not be useful in applications that do not provide a small list of candidate answers to choose from. As a step towards making commonsense reasoning research more realistic, we propose to study open-e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.14439v2-abstract-full').style.display = 'inline'; document.getElementById('2010.14439v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.14439v2-abstract-full" style="display: none;"> Current commonsense reasoning research focuses on developing models that use commonsense knowledge to answer multiple-choice questions. However, systems designed to answer multiple-choice questions may not be useful in applications that do not provide a small list of candidate answers to choose from. As a step towards making commonsense reasoning research more realistic, we propose to study open-ended commonsense reasoning (OpenCSR) -- the task of answering a commonsense question without any pre-defined choices -- using as a resource only a corpus of commonsense facts written in natural language. OpenCSR is challenging due to a large decision space, and because many questions require implicit multi-hop reasoning. As an approach to OpenCSR, we propose DrFact, an efficient Differentiable model for multi-hop Reasoning over knowledge Facts. To evaluate OpenCSR methods, we adapt several popular commonsense reasoning benchmarks, and collect multiple new answers for each test question via crowd-sourcing. Experiments show that DrFact outperforms strong baseline methods by a large margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.14439v2-abstract-full').style.display = 'none'; document.getElementById('2010.14439v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL 2021. Project website: https://open-csr.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.10439">arXiv:2010.10439</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.10439">pdf</a>, <a href="https://arxiv.org/format/2010.10439">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Open Question Answering over Tables and Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenhu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+M">Ming-Wei Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Schlinger%2C+E">Eva Schlinger</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">William Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.10439v2-abstract-short" style="display: inline;"> In open question answering (QA), the answer to a question is produced by retrieving and then analyzing documents that might contain answers to the question. Most open QA systems have considered only retrieving information from unstructured text. Here we consider for the first time open QA over both tabular and textual data and present a new large-scale dataset Open Table-and-Text Question Answerin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.10439v2-abstract-full').style.display = 'inline'; document.getElementById('2010.10439v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.10439v2-abstract-full" style="display: none;"> In open question answering (QA), the answer to a question is produced by retrieving and then analyzing documents that might contain answers to the question. Most open QA systems have considered only retrieving information from unstructured text. Here we consider for the first time open QA over both tabular and textual data and present a new large-scale dataset Open Table-and-Text Question Answering (OTT-QA) to evaluate performance on this task. Most questions in OTT-QA require multi-hop inference across tabular data and unstructured text, and the evidence required to answer a question can be distributed in different ways over these two types of input, making evidence retrieval challenging -- our baseline model using an iterative retriever and BERT-based reader achieves an exact match score less than 10%. We then propose two novel techniques to address the challenge of retrieving and aggregating evidence for OTT-QA. The first technique is to use &#34;early fusion&#34; to group multiple highly relevant tabular and textual units into a fused block, which provides more context for the retriever to search for. The second technique is to use a cross-block reader to model the cross-dependency between multiple retrieved evidence with global-local sparse attention. Combining these two techniques improves the score significantly, to above 27%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.10439v2-abstract-full').style.display = 'none'; document.getElementById('2010.10439v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICLR 2021. Main paper has 9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.00849">arXiv:2007.00849</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2007.00849">pdf</a>, <a href="https://arxiv.org/format/2007.00849">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Facts as Experts: Adaptable and Interpretable Neural Memory over Symbolic Knowledge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Soares%2C+L+B">Livio Baldini Soares</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.00849v1-abstract-short" style="display: inline;"> Massive language models are the core of modern NLP modeling and have been shown to encode impressive amounts of commonsense and factual information. However, that knowledge exists only within the latent parameters of the model, inaccessible to inspection and interpretation, and even worse, factual information memorized from the training corpora is likely to become stale as the world changes. Knowl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.00849v1-abstract-full').style.display = 'inline'; document.getElementById('2007.00849v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.00849v1-abstract-full" style="display: none;"> Massive language models are the core of modern NLP modeling and have been shown to encode impressive amounts of commonsense and factual information. However, that knowledge exists only within the latent parameters of the model, inaccessible to inspection and interpretation, and even worse, factual information memorized from the training corpora is likely to become stale as the world changes. Knowledge stored as parameters will also inevitably exhibit all of the biases inherent in the source materials. To address these problems, we develop a neural language model that includes an explicit interface between symbolically interpretable factual information and subsymbolic neural knowledge. We show that this model dramatically improves performance on two knowledge-intensive question-answering tasks. More interestingly, the model can be updated without re-training by manipulating its symbolic representations. In particular this model allows us to add new facts and overwrite existing ones in ways that are not possible for earlier models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.00849v1-abstract-full').style.display = 'none'; document.getElementById('2007.00849v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.12554">arXiv:2004.12554</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.12554">pdf</a>, <a href="https://arxiv.org/format/2004.12554">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Forecasting in Non-stationary Environments with Fuzzy Time Series </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Silva%2C+P+C+d+L+e">Petr么nio C芒ndido de Lima e Silva</a>, <a href="/search/cs?searchtype=author&amp;query=Junior%2C+C+A+S">Carlos Alberto Severiano Junior</a>, <a href="/search/cs?searchtype=author&amp;query=Alves%2C+M+A">Marcos Antonio Alves</a>, <a href="/search/cs?searchtype=author&amp;query=Silva%2C+R">Rodrigo Silva</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+M+W">Miri Weiss Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Guimar%C3%A3es%2C+F+G">Frederico Gadelha Guimar茫es</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.12554v1-abstract-short" style="display: inline;"> In this paper we introduce a Non-Stationary Fuzzy Time Series (NSFTS) method with time varying parameters adapted from the distribution of the data. In this approach, we employ Non-Stationary Fuzzy Sets, in which perturbation functions are used to adapt the membership function parameters in the knowledge base in response to statistical changes in the time series. The proposed method is capable of&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.12554v1-abstract-full').style.display = 'inline'; document.getElementById('2004.12554v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.12554v1-abstract-full" style="display: none;"> In this paper we introduce a Non-Stationary Fuzzy Time Series (NSFTS) method with time varying parameters adapted from the distribution of the data. In this approach, we employ Non-Stationary Fuzzy Sets, in which perturbation functions are used to adapt the membership function parameters in the knowledge base in response to statistical changes in the time series. The proposed method is capable of dynamically adapting its fuzzy sets to reflect the changes in the stochastic process based on the residual errors, without the need to retraining the model. This method can handle non-stationary and heteroskedastic data as well as scenarios with concept-drift. The proposed approach allows the model to be trained only once and remain useful long after while keeping reasonable accuracy. The flexibility of the method by means of computational experiments was tested with eight synthetic non-stationary time series data with several kinds of concept drifts, four real market indices (Dow Jones, NASDAQ, SP500 and TAIEX), three real FOREX pairs (EUR-USD, EUR-GBP, GBP-USD), and two real cryptocoins exchange rates (Bitcoin-USD and Ethereum-USD). As competitor models the Time Variant fuzzy time series and the Incremental Ensemble were used, these are two of the major approaches for handling non-stationary data sets. Non-parametric tests are employed to check the significance of the results. The proposed method shows resilience to concept drift, by adapting parameters of the model, while preserving the symbolic structure of the knowledge base. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.12554v1-abstract-full').style.display = 'none'; document.getElementById('2004.12554v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 7 figures, submitted to Applied Soft Computing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.03658">arXiv:2004.03658</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.03658">pdf</a>, <a href="https://arxiv.org/format/2004.03658">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Faithful Embeddings for Knowledge Base Queries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Arnold%2C+A+O">Andrew O. Arnold</a>, <a href="/search/cs?searchtype=author&amp;query=Bedrax-Weiss%2C+T">Tania Bedrax-Weiss</a>, <a href="/search/cs?searchtype=author&amp;query=Pereira%2C+F">Fernando Pereira</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.03658v3-abstract-short" style="display: inline;"> The deductive closure of an ideal knowledge base (KB) contains exactly the logical queries that the KB can answer. However, in practice KBs are both incomplete and over-specified, failing to answer some queries that have real-world answers. \emph{Query embedding} (QE) techniques have been recently proposed where KB entities and KB queries are represented jointly in an embedding space, supporting r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.03658v3-abstract-full').style.display = 'inline'; document.getElementById('2004.03658v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.03658v3-abstract-full" style="display: none;"> The deductive closure of an ideal knowledge base (KB) contains exactly the logical queries that the KB can answer. However, in practice KBs are both incomplete and over-specified, failing to answer some queries that have real-world answers. \emph{Query embedding} (QE) techniques have been recently proposed where KB entities and KB queries are represented jointly in an embedding space, supporting relaxation and generalization in KB inference. However, experiments in this paper show that QE systems may disagree with deductive reasoning on answers that do not require generalization or relaxation. We address this problem with a novel QE method that is more faithful to deductive reasoning, and show that this leads to better performance on complex queries to incomplete KBs. Finally we show that inserting this new QE module into a neural question-answering system leads to substantial improvements over the state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.03658v3-abstract-full').style.display = 'none'; document.getElementById('2004.03658v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at 34th Conference on Neural Information Processing Systems (NeurIPS 2020), Vancouver, Canada</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.10640">arXiv:2002.10640</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2002.10640">pdf</a>, <a href="https://arxiv.org/format/2002.10640">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Differentiable Reasoning over a Virtual Knowledge Base </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Zaheer%2C+M">Manzil Zaheer</a>, <a href="/search/cs?searchtype=author&amp;query=Balachandran%2C+V">Vidhisha Balachandran</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.10640v1-abstract-short" style="display: inline;"> We consider the task of answering complex multi-hop questions using a corpus as a virtual knowledge base (KB). In particular, we describe a neural module, DrKIT, that traverses textual data like a KB, softly following paths of relations between mentions of entities in the corpus. At each step the module uses a combination of sparse-matrix TFIDF indices and a maximum inner product search (MIPS) on&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.10640v1-abstract-full').style.display = 'inline'; document.getElementById('2002.10640v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.10640v1-abstract-full" style="display: none;"> We consider the task of answering complex multi-hop questions using a corpus as a virtual knowledge base (KB). In particular, we describe a neural module, DrKIT, that traverses textual data like a KB, softly following paths of relations between mentions of entities in the corpus. At each step the module uses a combination of sparse-matrix TFIDF indices and a maximum inner product search (MIPS) on a special index of contextual representations of the mentions. This module is differentiable, so the full system can be trained end-to-end using gradient based methods, starting from natural language inputs. We also describe a pretraining scheme for the contextual representation encoder by generating hard negative examples using existing knowledge bases. We show that DrKIT improves accuracy by 9 points on 3-hop questions in the MetaQA dataset, cutting the gap between text-based and KB-based state-of-the-art by 70%. On HotpotQA, DrKIT leads to a 10% improvement over a BERT-based re-ranking approach to retrieving the relevant passages required to answer a question. DrKIT is also very efficient, processing 10-100x more queries per second than existing multi-hop systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.10640v1-abstract-full').style.display = 'none'; document.getElementById('2002.10640v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.06115">arXiv:2002.06115</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2002.06115">pdf</a>, <a href="https://arxiv.org/format/2002.06115">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Scalable Neural Methods for Reasoning With a Symbolic Knowledge Base </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Hofer%2C+R+A">R. Alex Hofer</a>, <a href="/search/cs?searchtype=author&amp;query=Siegler%2C+M">Matthew Siegler</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.06115v1-abstract-short" style="display: inline;"> We describe a novel way of representing a symbolic knowledge base (KB) called a sparse-matrix reified KB. This representation enables neural modules that are fully differentiable, faithful to the original semantics of the KB, expressive enough to model multi-hop inferences, and scalable enough to use with realistically large KBs. The sparse-matrix reified KB can be distributed across multiple GPUs&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.06115v1-abstract-full').style.display = 'inline'; document.getElementById('2002.06115v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.06115v1-abstract-full" style="display: none;"> We describe a novel way of representing a symbolic knowledge base (KB) called a sparse-matrix reified KB. This representation enables neural modules that are fully differentiable, faithful to the original semantics of the KB, expressive enough to model multi-hop inferences, and scalable enough to use with realistically large KBs. The sparse-matrix reified KB can be distributed across multiple GPUs, can scale to tens of millions of entities and facts, and is orders of magnitude faster than naive sparse-matrix implementations. The reified KB enables very simple end-to-end architectures to obtain competitive performance on several benchmarks representing two families of tasks: KB completion, and learning semantic parsers from denotations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.06115v1-abstract-full').style.display = 'none'; document.getElementById('2002.06115v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Also published in ICLR2020 https://openreview.net/forum?id=BJlguT4YPr&amp;noteId=BJlguT4YPr</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1912.06074">arXiv:1912.06074</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1912.06074">pdf</a>, <a href="https://arxiv.org/format/1912.06074">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Game Design for Eliciting Distinguishable Behavior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Leqi%2C+L">Liu Leqi</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yifan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Lipton%2C+Z+C">Zachary C. Lipton</a>, <a href="/search/cs?searchtype=author&amp;query=Ravikumar%2C+P">Pradeep Ravikumar</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Mitchell%2C+T">Tom Mitchell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1912.06074v1-abstract-short" style="display: inline;"> The ability to inferring latent psychological traits from human behavior is key to developing personalized human-interacting machine learning systems. Approaches to infer such traits range from surveys to manually-constructed experiments and games. However, these traditional games are limited because they are typically designed based on heuristics. In this paper, we formulate the task of designing&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.06074v1-abstract-full').style.display = 'inline'; document.getElementById('1912.06074v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1912.06074v1-abstract-full" style="display: none;"> The ability to inferring latent psychological traits from human behavior is key to developing personalized human-interacting machine learning systems. Approaches to infer such traits range from surveys to manually-constructed experiments and games. However, these traditional games are limited because they are typically designed based on heuristics. In this paper, we formulate the task of designing \emph{behavior diagnostic games} that elicit distinguishable behavior as a mutual information maximization problem, which can be solved by optimizing a variational lower bound. Our framework is instantiated by using prospect theory to model varying player traits, and Markov Decision Processes to parameterize the games. We validate our approach empirically, showing that our designed games can successfully distinguish among players with different traits, outperforming manually-designed ones by a large margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1912.06074v1-abstract-full').style.display = 'none'; document.getElementById('1912.06074v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33rd Conference on Neural Information Processing Systems (NeurIPS 2019)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.06111">arXiv:1911.06111</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1911.06111">pdf</a>, <a href="https://arxiv.org/format/1911.06111">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Instance-based Transfer Learning for Multilingual Deep Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Arnold%2C+A+O">Andrew O. Arnold</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.06111v3-abstract-short" style="display: inline;"> We focus on the problem of search in the multilingual setting. Examining the problems of next-sentence prediction and inverse cloze, we show that at large scale, instance-based transfer learning is surprisingly effective in the multilingual setting, leading to positive transfer on all of the 35 target languages and two tasks tested. We analyze this improvement and argue that the most natural expla&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.06111v3-abstract-full').style.display = 'inline'; document.getElementById('1911.06111v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.06111v3-abstract-full" style="display: none;"> We focus on the problem of search in the multilingual setting. Examining the problems of next-sentence prediction and inverse cloze, we show that at large scale, instance-based transfer learning is surprisingly effective in the multilingual setting, leading to positive transfer on all of the 35 target languages and two tasks tested. We analyze this improvement and argue that the most natural explanation, namely direct vocabulary overlap between languages, only partially explains the performance gains: in fact, we demonstrate target-language improvement can occur after adding data from an auxiliary language even with no vocabulary in common with the target. This surprising result is due to the effect of transitive vocabulary overlaps between pairs of auxiliary and target languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.06111v3-abstract-full').style.display = 'none'; document.getElementById('1911.06111v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> The Web Conference Workshop on Multilingual Search, 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.06146">arXiv:1909.06146</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1909.06146">pdf</a>, <a href="https://arxiv.org/format/1909.06146">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> PubMedQA: A Dataset for Biomedical Research Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jin%2C+Q">Qiao Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhengping Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+X">Xinghua Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.06146v1-abstract-short" style="display: inline;"> We introduce PubMedQA, a novel biomedical question answering (QA) dataset collected from PubMed abstracts. The task of PubMedQA is to answer research questions with yes/no/maybe (e.g.: Do preoperative statins reduce atrial fibrillation after coronary artery bypass grafting?) using the corresponding abstracts. PubMedQA has 1k expert-annotated, 61.2k unlabeled and 211.3k artificially generated QA in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.06146v1-abstract-full').style.display = 'inline'; document.getElementById('1909.06146v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.06146v1-abstract-full" style="display: none;"> We introduce PubMedQA, a novel biomedical question answering (QA) dataset collected from PubMed abstracts. The task of PubMedQA is to answer research questions with yes/no/maybe (e.g.: Do preoperative statins reduce atrial fibrillation after coronary artery bypass grafting?) using the corresponding abstracts. PubMedQA has 1k expert-annotated, 61.2k unlabeled and 211.3k artificially generated QA instances. Each PubMedQA instance is composed of (1) a question which is either an existing research article title or derived from one, (2) a context which is the corresponding abstract without its conclusion, (3) a long answer, which is the conclusion of the abstract and, presumably, answers the research question, and (4) a yes/no/maybe answer which summarizes the conclusion. PubMedQA is the first QA dataset where reasoning over biomedical research texts, especially their quantitative contents, is required to answer the questions. Our best performing model, multi-phase fine-tuning of BioBERT with long answer bag-of-word statistics as additional supervision, achieves 68.1% accuracy, compared to single human performance of 78.0% accuracy and majority-baseline of 55.2% accuracy, leaving much room for improvement. PubMedQA is publicly available at https://pubmedqa.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.06146v1-abstract-full').style.display = 'none'; document.getElementById('1909.06146v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.01081">arXiv:1906.01081</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1906.01081">pdf</a>, <a href="https://arxiv.org/format/1906.01081">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Handling Divergent Reference Texts when Evaluating Table-to-Text Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Faruqui%2C+M">Manaal Faruqui</a>, <a href="/search/cs?searchtype=author&amp;query=Parikh%2C+A">Ankur Parikh</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+M">Ming-Wei Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Das%2C+D">Dipanjan Das</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.01081v1-abstract-short" style="display: inline;"> Automatically constructed datasets for generating text from semi-structured data (tables), such as WikiBio, often contain reference texts that diverge from the information in the corresponding semi-structured data. We show that metrics which rely solely on the reference texts, such as BLEU and ROUGE, show poor correlation with human judgments when those references diverge. We propose a new metric,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.01081v1-abstract-full').style.display = 'inline'; document.getElementById('1906.01081v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.01081v1-abstract-full" style="display: none;"> Automatically constructed datasets for generating text from semi-structured data (tables), such as WikiBio, often contain reference texts that diverge from the information in the corresponding semi-structured data. We show that metrics which rely solely on the reference texts, such as BLEU and ROUGE, show poor correlation with human judgments when those references diverge. We propose a new metric, PARENT, which aligns n-grams from the reference and generated texts to the semi-structured data before computing their precision and recall. Through a large scale human evaluation study of table-to-text models for WikiBio, we show that PARENT correlates with human judgments better than existing text generation metrics. We also adapt and evaluate the information extraction based evaluation proposed by Wiseman et al (2017), and show that PARENT has comparable correlation to it, while being easier to use. We show that PARENT is also applicable when the reference texts are elicited from humans using the data from the WebNLG challenge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.01081v1-abstract-full').style.display = 'none'; document.getElementById('1906.01081v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at ACL 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.10417">arXiv:1905.10417</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1905.10417">pdf</a>, <a href="https://arxiv.org/format/1905.10417">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Differentiable Representations For Multihop Inference Rules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Hofer%2C+R+A">R. Alex Hofer</a>, <a href="/search/cs?searchtype=author&amp;query=Siegler%2C+M">Matthew Siegler</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.10417v1-abstract-short" style="display: inline;"> We present efficient differentiable implementations of second-order multi-hop reasoning using a large symbolic knowledge base (KB). We introduce a new operation which can be used to compositionally construct second-order multi-hop templates in a neural model, and evaluate a number of alternative implementations, with different time and memory trade offs. These techniques scale to KBs with millions&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.10417v1-abstract-full').style.display = 'inline'; document.getElementById('1905.10417v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.10417v1-abstract-full" style="display: none;"> We present efficient differentiable implementations of second-order multi-hop reasoning using a large symbolic knowledge base (KB). We introduce a new operation which can be used to compositionally construct second-order multi-hop templates in a neural model, and evaluate a number of alternative implementations, with different time and memory trade offs. These techniques scale to KBs with millions of entities and tens of millions of triples, and lead to simple models with competitive performance on several learning tasks requiring multi-hop reasoning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.10417v1-abstract-full').style.display = 'none'; document.getElementById('1905.10417v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.06209">arXiv:1905.06209</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1905.06209">pdf</a>, <a href="https://arxiv.org/format/1905.06209">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Neural Query Language: A Knowledge Base Query Language for Tensorflow </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Siegler%2C+M">Matthew Siegler</a>, <a href="/search/cs?searchtype=author&amp;query=Hofer%2C+A">Alex Hofer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.06209v1-abstract-short" style="display: inline;"> Large knowledge bases (KBs) are useful for many AI tasks, but are difficult to integrate into modern gradient-based learning systems. Here we describe a framework for accessing soft symbolic database using only differentiable operators. For example, this framework makes it easy to conveniently write neural models that adjust confidences associated with facts in a soft KB; incorporate prior knowled&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.06209v1-abstract-full').style.display = 'inline'; document.getElementById('1905.06209v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.06209v1-abstract-full" style="display: none;"> Large knowledge bases (KBs) are useful for many AI tasks, but are difficult to integrate into modern gradient-based learning systems. Here we describe a framework for accessing soft symbolic database using only differentiable operators. For example, this framework makes it easy to conveniently write neural models that adjust confidences associated with facts in a soft KB; incorporate prior knowledge in the form of hand-coded KB access rules; or learn to instantiate query templates using information extracted from text. NQL can work well with KBs with millions of tuples and hundreds of thousands of entities on a single GPU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.06209v1-abstract-full').style.display = 'none'; document.getElementById('1905.06209v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.09537">arXiv:1904.09537</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1904.09537">pdf</a>, <a href="https://arxiv.org/format/1904.09537">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PullNet: Open Domain Question Answering with Iterative Retrieval on Knowledge Bases and Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Bedrax-Weiss%2C+T">Tania Bedrax-Weiss</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.09537v1-abstract-short" style="display: inline;"> We consider open-domain queston answering (QA) where answers are drawn from either a corpus, a knowledge base (KB), or a combination of both of these. We focus on a setting in which a corpus is supplemented with a large but incomplete KB, and on questions that require non-trivial (e.g., ``multi-hop&#39;&#39;) reasoning. We describe PullNet, an integrated framework for (1) learning what to retrieve (from t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.09537v1-abstract-full').style.display = 'inline'; document.getElementById('1904.09537v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.09537v1-abstract-full" style="display: none;"> We consider open-domain queston answering (QA) where answers are drawn from either a corpus, a knowledge base (KB), or a combination of both of these. We focus on a setting in which a corpus is supplemented with a large but incomplete KB, and on questions that require non-trivial (e.g., ``multi-hop&#39;&#39;) reasoning. We describe PullNet, an integrated framework for (1) learning what to retrieve (from the KB and/or corpus) and (2) reasoning with this heterogeneous information to find the best answer. PullNet uses an {iterative} process to construct a question-specific subgraph that contains information relevant to the question. In each iteration, a graph convolutional network (graph CNN) is used to identify subgraph nodes that should be expanded using retrieval (or ``pull&#39;&#39;) operations on the corpus and/or KB. After the subgraph is complete, a similar graph CNN is used to extract the answer from the subgraph. This retrieve-and-reason process allows us to answer multi-hop questions using large KBs and corpora. PullNet is weakly supervised, requiring question-answer pairs but not gold inference paths. Experimentally PullNet improves over the prior state-of-the art, and in the setting where a corpus is used with incomplete KB these improvements are often dramatic. PullNet is also often superior to prior systems in a KB-only setting or a text-only setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.09537v1-abstract-full').style.display = 'none'; document.getElementById('1904.09537v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Cohen%2C+W&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Cohen%2C+W&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cohen%2C+W&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10