CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 56 results for author: <span class="mathjax">Dhingra, B</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Dhingra%2C+B">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Dhingra, B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Dhingra%2C+B&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Dhingra, B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Dhingra%2C+B&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Dhingra%2C+B&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Dhingra%2C+B&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20494">arXiv:2410.20494</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.20494">pdf</a>, <a href="https://arxiv.org/format/2410.20494">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MatViX: Multimodal Information Extraction from Visually Rich Articles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khalighinejad%2C+G">Ghazal Khalighinejad</a>, <a href="/search/cs?searchtype=author&amp;query=Scott%2C+S">Sharon Scott</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+O">Ollie Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Anderson%2C+K+L">Kelly L. Anderson</a>, <a href="/search/cs?searchtype=author&amp;query=Stureborg%2C+R">Rickard Stureborg</a>, <a href="/search/cs?searchtype=author&amp;query=Tyagi%2C+A">Aman Tyagi</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20494v1-abstract-short" style="display: inline;"> Multimodal information extraction (MIE) is crucial for scientific literature, where valuable data is often spread across text, figures, and tables. In materials science, extracting structured information from research articles can accelerate the discovery of new materials. However, the multimodal nature and complex interconnections of scientific content present challenges for traditional text-base&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20494v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20494v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20494v1-abstract-full" style="display: none;"> Multimodal information extraction (MIE) is crucial for scientific literature, where valuable data is often spread across text, figures, and tables. In materials science, extracting structured information from research articles can accelerate the discovery of new materials. However, the multimodal nature and complex interconnections of scientific content present challenges for traditional text-based methods. We introduce \textsc{MatViX}, a benchmark consisting of $324$ full-length research articles and $1,688$ complex structured JSON files, carefully curated by domain experts. These JSON files are extracted from text, tables, and figures in full-length documents, providing a comprehensive challenge for MIE. We introduce an evaluation method to assess the accuracy of curve similarity and the alignment of hierarchical structures. Additionally, we benchmark vision-language models (VLMs) in a zero-shot manner, capable of processing long contexts and multimodal inputs, and show that using a specialized model (DePlot) can improve performance in extracting curves. Our results demonstrate significant room for improvement in current models. Our dataset and evaluation code are available\footnote{\url{https://matvix-bench.github.io/}}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20494v1-abstract-full').style.display = 'none'; document.getElementById('2410.20494v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14675">arXiv:2410.14675</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.14675">pdf</a>, <a href="https://arxiv.org/format/2410.14675">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Large Language Models&#39; Situated Faithfulness to External Contexts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yukun Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Sanxing Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+H">Hongyi Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14675v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) are often augmented with external information as contexts, but this external information can sometimes be inaccurate or even intentionally misleading. We argue that robust LLMs should demonstrate situated faithfulness, dynamically calibrating their trust in external information based on their confidence in the internal knowledge and the external context. To benchmark t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14675v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14675v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14675v1-abstract-full" style="display: none;"> Large Language Models (LLMs) are often augmented with external information as contexts, but this external information can sometimes be inaccurate or even intentionally misleading. We argue that robust LLMs should demonstrate situated faithfulness, dynamically calibrating their trust in external information based on their confidence in the internal knowledge and the external context. To benchmark this capability, we evaluate LLMs across several QA datasets, including a newly created dataset called RedditQA featuring in-the-wild incorrect contexts sourced from Reddit posts. We show that when provided with both correct and incorrect contexts, both open-source and proprietary models tend to overly rely on external information, regardless of its factual accuracy. To enhance situated faithfulness, we propose two approaches: Self-Guided Confidence Reasoning (SCR) and Rule-Based Confidence Reasoning (RCR). SCR enables models to self-access the confidence of external information relative to their own internal knowledge to produce the most accurate answer. RCR, in contrast, extracts explicit confidence signals from the LLM and determines the final answer using predefined rules. Our results show that for LLMs with strong reasoning capabilities, such as GPT-4o and GPT-4o mini, SCR outperforms RCR, achieving improvements of up to 24.2% over a direct input augmentation baseline. Conversely, for a smaller model like Llama-3-8B, RCR outperforms SCR. Fine-tuning SCR with our proposed Confidence Reasoning Direct Preference Optimization (CR-DPO) method improves performance on both seen and unseen datasets, yielding an average improvement of 8.9% on Llama-3-8B. In addition to quantitative results, we offer insights into the relative strengths of SCR and RCR. Our findings highlight promising avenues for improving situated faithfulness in LLMs. The data and code are released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14675v1-abstract-full').style.display = 'none'; document.getElementById('2410.14675v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14651">arXiv:2410.14651</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.14651">pdf</a>, <a href="https://arxiv.org/format/2410.14651">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Real-time Fake News from Adversarial Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Sanxing Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yukun Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14651v1-abstract-short" style="display: inline;"> We show that existing evaluations for fake news detection based on conventional sources, such as claims on fact-checking websites, result in an increasing accuracy over time for LLM-based detectors -- even after their knowledge cutoffs. This suggests that recent popular political claims, which form the majority of fake news on such sources, are easily classified using surface-level shallow pattern&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14651v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14651v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14651v1-abstract-full" style="display: none;"> We show that existing evaluations for fake news detection based on conventional sources, such as claims on fact-checking websites, result in an increasing accuracy over time for LLM-based detectors -- even after their knowledge cutoffs. This suggests that recent popular political claims, which form the majority of fake news on such sources, are easily classified using surface-level shallow patterns. Instead, we argue that a proper fake news detection dataset should test a model&#39;s ability to reason factually about the current world by retrieving and reading related evidence. To this end, we develop a novel pipeline that leverages natural language feedback from a RAG-based detector to iteratively modify real-time news into deceptive fake news that challenges LLMs. Our iterative rewrite decreases the binary classification AUC by an absolute 17.5 percent for a strong RAG GPT-4o detector. Our experiments reveal the important role of RAG in both detecting and generating fake news, as retrieval-free LLM detectors are vulnerable to unseen events and adversarial attacks, while feedback from RAG detection helps discover more deceitful patterns in fake news. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14651v1-abstract-full').style.display = 'none'; document.getElementById('2410.14651v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14635">arXiv:2410.14635</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.14635">pdf</a>, <a href="https://arxiv.org/format/2410.14635">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> GenEOL: Harnessing the Generative Power of LLMs for Training-Free Sentence Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thirukovalluru%2C+R">Raghuveer Thirukovalluru</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14635v1-abstract-short" style="display: inline;"> Training-free embedding methods directly leverage pretrained large language models (LLMs) to embed text, bypassing the costly and complex procedure of contrastive learning. Previous training-free embedding methods have mainly focused on optimizing embedding prompts and have overlooked the benefits of utilizing the generative abilities of LLMs. We propose a novel method, GenEOL, which uses LLMs to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14635v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14635v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14635v1-abstract-full" style="display: none;"> Training-free embedding methods directly leverage pretrained large language models (LLMs) to embed text, bypassing the costly and complex procedure of contrastive learning. Previous training-free embedding methods have mainly focused on optimizing embedding prompts and have overlooked the benefits of utilizing the generative abilities of LLMs. We propose a novel method, GenEOL, which uses LLMs to generate diverse transformations of a sentence that preserve its meaning, and aggregates the resulting embeddings of these transformations to enhance the overall sentence embedding. GenEOL significantly outperforms the existing training-free embedding methods by an average of 2.85 points across several LLMs on the sentence semantic text similarity (STS) benchmark. Our analysis shows that GenEOL stabilizes representation quality across LLM layers and is robust to perturbations of embedding prompts. GenEOL also achieves notable gains on multiple clustering, reranking and pair-classification tasks from the MTEB benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14635v1-abstract-full').style.display = 'none'; document.getElementById('2410.14635v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12656">arXiv:2410.12656</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.12656">pdf</a>, <a href="https://arxiv.org/format/2410.12656">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Morphological Compositional Generalization in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ismayilzada%2C+M">Mete Ismayilzada</a>, <a href="/search/cs?searchtype=author&amp;query=Circi%2C+D">Defne Circi</a>, <a href="/search/cs?searchtype=author&amp;query=S%C3%A4lev%C3%A4%2C+J">Jonne S盲lev盲</a>, <a href="/search/cs?searchtype=author&amp;query=Sirin%2C+H">Hale Sirin</a>, <a href="/search/cs?searchtype=author&amp;query=K%C3%B6ksal%2C+A">Abdullatif K枚ksal</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Bosselut%2C+A">Antoine Bosselut</a>, <a href="/search/cs?searchtype=author&amp;query=van+der+Plas%2C+L">Lonneke van der Plas</a>, <a href="/search/cs?searchtype=author&amp;query=Ataman%2C+D">Duygu Ataman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12656v2-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated significant progress in various natural language generation and understanding tasks. However, their linguistic generalization capabilities remain questionable, raising doubts about whether these models learn language similarly to humans. While humans exhibit compositional generalization and linguistic creativity in language use, the extent to which LL&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12656v2-abstract-full').style.display = 'inline'; document.getElementById('2410.12656v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12656v2-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated significant progress in various natural language generation and understanding tasks. However, their linguistic generalization capabilities remain questionable, raising doubts about whether these models learn language similarly to humans. While humans exhibit compositional generalization and linguistic creativity in language use, the extent to which LLMs replicate these abilities, particularly in morphology, is under-explored. In this work, we systematically investigate the morphological generalization abilities of LLMs through the lens of compositionality. We define morphemes as compositional primitives and design a novel suite of generative and discriminative tasks to assess morphological productivity and systematicity. Focusing on agglutinative languages such as Turkish and Finnish, we evaluate several state-of-the-art instruction-finetuned multilingual models, including GPT-4 and Gemini. Our analysis shows that LLMs struggle with morphological compositional generalization particularly when applied to novel word roots, with performance declining sharply as morphological complexity increases. While models can identify individual morphological combinations better than chance, their performance lacks systematicity, leading to significant accuracy gaps compared to humans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12656v2-abstract-full').style.display = 'none'; document.getElementById('2410.12656v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15968">arXiv:2406.15968</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.15968">pdf</a>, <a href="https://arxiv.org/format/2406.15968">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ReCaLL: Membership Inference via Relative Conditional Log-Likelihoods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xie%2C+R">Roy Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Junlin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+R">Ruomin Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Minxing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Ge%2C+R">Rong Ge</a>, <a href="/search/cs?searchtype=author&amp;query=Pei%2C+J">Jian Pei</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+N+Z">Neil Zhenqiang Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15968v1-abstract-short" style="display: inline;"> The rapid scaling of large language models (LLMs) has raised concerns about the transparency and fair use of the pretraining data used for training them. Detecting such content is challenging due to the scale of the data and limited exposure of each instance during training. We propose ReCaLL (Relative Conditional Log-Likelihood), a novel membership inference attack (MIA) to detect LLMs&#39; pretraini&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15968v1-abstract-full').style.display = 'inline'; document.getElementById('2406.15968v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15968v1-abstract-full" style="display: none;"> The rapid scaling of large language models (LLMs) has raised concerns about the transparency and fair use of the pretraining data used for training them. Detecting such content is challenging due to the scale of the data and limited exposure of each instance during training. We propose ReCaLL (Relative Conditional Log-Likelihood), a novel membership inference attack (MIA) to detect LLMs&#39; pretraining data by leveraging their conditional language modeling capabilities. ReCaLL examines the relative change in conditional log-likelihoods when prefixing target data points with non-member context. Our empirical findings show that conditioning member data on non-member prefixes induces a larger decrease in log-likelihood compared to non-member data. We conduct comprehensive experiments and show that ReCaLL achieves state-of-the-art performance on the WikiMIA dataset, even with random and synthetic prefixes, and can be further improved using an ensemble approach. Moreover, we conduct an in-depth analysis of LLMs&#39; behavior with different membership contexts, providing insights into how LLMs leverage membership information for effective inference at both the sequence and token level. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15968v1-abstract-full').style.display = 'none'; document.getElementById('2406.15968v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.06737">arXiv:2406.06737</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.06737">pdf</a>, <a href="https://arxiv.org/format/2406.06737">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2024.findings-acl.791">10.18653/v1/2024.findings-acl.791 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Raccoon: Prompt Extraction Benchmark of LLM-Integrated Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Junlin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+T">Tianyi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+R">Roy Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.06737v2-abstract-short" style="display: inline;"> With the proliferation of LLM-integrated applications such as GPT-s, millions are deployed, offering valuable services through proprietary instruction prompts. These systems, however, are prone to prompt extraction attacks through meticulously designed queries. To help mitigate this problem, we introduce the Raccoon benchmark which comprehensively evaluates a model&#39;s susceptibility to prompt extra&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06737v2-abstract-full').style.display = 'inline'; document.getElementById('2406.06737v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.06737v2-abstract-full" style="display: none;"> With the proliferation of LLM-integrated applications such as GPT-s, millions are deployed, offering valuable services through proprietary instruction prompts. These systems, however, are prone to prompt extraction attacks through meticulously designed queries. To help mitigate this problem, we introduce the Raccoon benchmark which comprehensively evaluates a model&#39;s susceptibility to prompt extraction attacks. Our novel evaluation method assesses models under both defenseless and defended scenarios, employing a dual approach to evaluate the effectiveness of existing defenses and the resilience of the models. The benchmark encompasses 14 categories of prompt extraction attacks, with additional compounded attacks that closely mimic the strategies of potential attackers, alongside a diverse collection of defense templates. This array is, to our knowledge, the most extensive compilation of prompt theft attacks and defense mechanisms to date. Our findings highlight universal susceptibility to prompt theft in the absence of defenses, with OpenAI models demonstrating notable resilience when protected. This paper aims to establish a more systematic benchmark for assessing LLM robustness against prompt extraction attacks, offering insights into their causes and potential countermeasures. Resources of Raccoon are publicly available at https://github.com/M0gician/RaccoonBench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06737v2-abstract-full').style.display = 'none'; document.getElementById('2406.06737v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2024 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04291">arXiv:2406.04291</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.04291">pdf</a>, <a href="https://arxiv.org/format/2406.04291">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Stratified Prediction-Powered Inference for Hybrid Language Model Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fisch%2C+A">Adam Fisch</a>, <a href="/search/cs?searchtype=author&amp;query=Maynez%2C+J">Joshua Maynez</a>, <a href="/search/cs?searchtype=author&amp;query=Hofer%2C+R+A">R. Alex Hofer</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Globerson%2C+A">Amir Globerson</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04291v1-abstract-short" style="display: inline;"> Prediction-powered inference (PPI) is a method that improves statistical estimates based on limited human-labeled data. PPI achieves this by combining small amounts of human-labeled data with larger amounts of data labeled by a reasonably accurate -- but potentially biased -- automatic system, in a way that results in tighter confidence intervals for certain parameters of interest (e.g., the mean&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04291v1-abstract-full').style.display = 'inline'; document.getElementById('2406.04291v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04291v1-abstract-full" style="display: none;"> Prediction-powered inference (PPI) is a method that improves statistical estimates based on limited human-labeled data. PPI achieves this by combining small amounts of human-labeled data with larger amounts of data labeled by a reasonably accurate -- but potentially biased -- automatic system, in a way that results in tighter confidence intervals for certain parameters of interest (e.g., the mean performance of a language model). In this paper, we propose a method called Stratified Prediction-Powered Inference (StratPPI), in which we show that the basic PPI estimates can be considerably improved by employing simple data stratification strategies. Without making any assumptions on the underlying automatic labeling system or data distribution, we derive an algorithm for computing provably valid confidence intervals for population parameters (such as averages) that is based on stratified sampling. In particular, we show both theoretically and empirically that, with appropriate choices of stratification and sample allocation, our approach can provide substantially tighter confidence intervals than unstratified approaches. Specifically, StratPPI is expected to improve in cases where the performance of the autorater varies across different conditional distributions of the target data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04291v1-abstract-full').style.display = 'none'; document.getElementById('2406.04291v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13131">arXiv:2405.13131</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.13131">pdf</a>, <a href="https://arxiv.org/format/2405.13131">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Atomic Self-Consistency for Better Long Form Generations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thirukovalluru%2C+R">Raghuveer Thirukovalluru</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yukun Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13131v1-abstract-short" style="display: inline;"> Recent work has aimed to improve LLM generations by filtering out hallucinations, thereby improving the precision of the information in responses. Correctness of a long-form response, however, also depends on the recall of multiple pieces of information relevant to the question. In this paper, we introduce Atomic Self-Consistency (ASC), a technique for improving the recall of relevant information&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13131v1-abstract-full').style.display = 'inline'; document.getElementById('2405.13131v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13131v1-abstract-full" style="display: none;"> Recent work has aimed to improve LLM generations by filtering out hallucinations, thereby improving the precision of the information in responses. Correctness of a long-form response, however, also depends on the recall of multiple pieces of information relevant to the question. In this paper, we introduce Atomic Self-Consistency (ASC), a technique for improving the recall of relevant information in an LLM response. ASC follows recent work, Universal Self-Consistency (USC) in using multiple stochastic samples from an LLM to improve the long-form response. Unlike USC which only focuses on selecting the best single generation, ASC picks authentic subparts from the samples and merges them into a superior composite answer. Through extensive experiments and ablations, we show that merging relevant subparts of multiple samples performs significantly better than picking a single sample. ASC demonstrates significant gains over USC on multiple factoids and open-ended QA datasets - ASQA, QAMPARI, QUEST, ELI5 with ChatGPT and Llama2. Our analysis also reveals untapped potential for enhancing long-form generations using approach of merging multiple samples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13131v1-abstract-full').style.display = 'none'; document.getElementById('2405.13131v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10861">arXiv:2405.10861</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.10861">pdf</a>, <a href="https://arxiv.org/format/2405.10861">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Tailoring Vaccine Messaging with Common-Ground Opinions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Stureborg%2C+R">Rickard Stureborg</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Sanxing Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+R">Ruoyu Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Patel%2C+A">Aayushi Patel</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Christopher Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+C+Q">Chloe Qinyu Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+T">Tingnan Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jun Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10861v2-abstract-short" style="display: inline;"> One way to personalize chatbot interactions is by establishing common ground with the intended reader. A domain where establishing mutual understanding could be particularly impactful is vaccine concerns and misinformation. Vaccine interventions are forms of messaging which aim to answer concerns expressed about vaccination. Tailoring responses in this domain is difficult, since opinions often hav&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10861v2-abstract-full').style.display = 'inline'; document.getElementById('2405.10861v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10861v2-abstract-full" style="display: none;"> One way to personalize chatbot interactions is by establishing common ground with the intended reader. A domain where establishing mutual understanding could be particularly impactful is vaccine concerns and misinformation. Vaccine interventions are forms of messaging which aim to answer concerns expressed about vaccination. Tailoring responses in this domain is difficult, since opinions often have seemingly little ideological overlap. We define the task of tailoring vaccine interventions to a Common-Ground Opinion (CGO). Tailoring responses to a CGO involves meaningfully improving the answer by relating it to an opinion or belief the reader holds. In this paper we introduce TAILOR-CGO, a dataset for evaluating how well responses are tailored to provided CGOs. We benchmark several major LLMs on this task; finding GPT-4-Turbo performs significantly better than others. We also build automatic evaluation metrics, including an efficient and accurate BERT model that outperforms finetuned LLMs, investigate how to successfully tailor vaccine messaging to CGOs, and provide actionable recommendations from this investigation. Code and model weights: https://github.com/rickardstureborg/tailor-cgo Dataset: https://huggingface.co/datasets/DukeNLP/tailor-cgo <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10861v2-abstract-full').style.display = 'none'; document.getElementById('2405.10861v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL Findings 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T50 (Primary) 68T01; 68T37; 91F20 (Secondary) <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2; I.2.7; I.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.06034">arXiv:2405.06034</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.06034">pdf</a>, <a href="https://arxiv.org/format/2405.06034">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Prediction-Powered Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hofer%2C+R+A">R. Alex Hofer</a>, <a href="/search/cs?searchtype=author&amp;query=Maynez%2C+J">Joshua Maynez</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Fisch%2C+A">Adam Fisch</a>, <a href="/search/cs?searchtype=author&amp;query=Globerson%2C+A">Amir Globerson</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.06034v1-abstract-short" style="display: inline;"> Prediction-powered inference (PPI) is a method that improves statistical estimates based on limited human-labeled data. Specifically, PPI methods provide tighter confidence intervals by combining small amounts of human-labeled data with larger amounts of data labeled by a reasonably accurate, but potentially biased, automatic system. We propose a framework for PPI based on Bayesian inference that&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06034v1-abstract-full').style.display = 'inline'; document.getElementById('2405.06034v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.06034v1-abstract-full" style="display: none;"> Prediction-powered inference (PPI) is a method that improves statistical estimates based on limited human-labeled data. Specifically, PPI methods provide tighter confidence intervals by combining small amounts of human-labeled data with larger amounts of data labeled by a reasonably accurate, but potentially biased, automatic system. We propose a framework for PPI based on Bayesian inference that allows researchers to develop new task-appropriate PPI methods easily. Exploiting the ease with which we can design new metrics, we propose improved PPI methods for several importantcases, such as autoraters that give discrete responses (e.g., prompted LLM ``judges&#39;&#39;) and autoraters with scores that have a non-linear relationship to human scores. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06034v1-abstract-full').style.display = 'none'; document.getElementById('2405.06034v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09911">arXiv:2404.09911</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.09911">pdf</a>, <a href="https://arxiv.org/format/2404.09911">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ChatShop: Interactive Information Seeking with Language Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S">Sanxing Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wiseman%2C+S">Sam Wiseman</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09911v2-abstract-short" style="display: inline;"> The desire and ability to seek new information strategically are fundamental to human learning but often overlooked in current language agent evaluation. We analyze a popular web shopping task designed to test language agents&#39; ability to perform strategic exploration and discover that it can be reformulated and solved as a single-turn retrieval task without the need for interactive information see&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09911v2-abstract-full').style.display = 'inline'; document.getElementById('2404.09911v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09911v2-abstract-full" style="display: none;"> The desire and ability to seek new information strategically are fundamental to human learning but often overlooked in current language agent evaluation. We analyze a popular web shopping task designed to test language agents&#39; ability to perform strategic exploration and discover that it can be reformulated and solved as a single-turn retrieval task without the need for interactive information seeking. This finding encourages us to rethink realistic constraints on information access that would necessitate strategic information seeking. We then redesign the task to introduce a notion of task ambiguity and the role of a shopper, serving as a dynamic party with whom the agent strategically interacts in an open-ended conversation to make informed decisions. Our experiments demonstrate that the proposed task can effectively evaluate the agent&#39;s ability to explore and gradually accumulate information through multi-turn interactions. Additionally, we show that large language model-simulated shoppers serve as a good proxy for real human shoppers, revealing similar error patterns in agents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09911v2-abstract-full').style.display = 'none'; document.getElementById('2404.09911v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01266">arXiv:2404.01266</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.01266">pdf</a>, <a href="https://arxiv.org/format/2404.01266">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> IsoBench: Benchmarking Multimodal Foundation Models on Isomorphic Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+D">Deqing Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+R">Ruohao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Khalighinejad%2C+G">Ghazal Khalighinejad</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+O">Ollie Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Yogatama%2C+D">Dani Yogatama</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+R">Robin Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Neiswanger%2C+W">Willie Neiswanger</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01266v3-abstract-short" style="display: inline;"> Current foundation models exhibit impressive capabilities when prompted either with text only or with both image and text inputs. But do their capabilities change depending on the input modality? In this work, we propose $\textbf{IsoBench}$, a benchmark dataset containing problems from four major areas: math, science, algorithms, and games. Each example is presented with multiple&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01266v3-abstract-full').style.display = 'inline'; document.getElementById('2404.01266v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01266v3-abstract-full" style="display: none;"> Current foundation models exhibit impressive capabilities when prompted either with text only or with both image and text inputs. But do their capabilities change depending on the input modality? In this work, we propose $\textbf{IsoBench}$, a benchmark dataset containing problems from four major areas: math, science, algorithms, and games. Each example is presented with multiple $\textbf{isomorphic representations}$ of inputs, such as visual, textual, and mathematical presentations. IsoBench provides fine-grained feedback to diagnose performance gaps caused by the form of the representation. Across various foundation models, we observe that on the same problem, models have a consistent preference towards textual representations. Most prominently, when evaluated on all IsoBench problems, Claude-3 Opus performs 28.7 points worse when provided with images instead of text; similarly, GPT-4 Turbo is 18.7 points worse and Gemini Pro is 14.9 points worse. Finally, we present two prompting techniques, $\textit{IsoCombination}$ and $\textit{IsoScratchPad}$, which improve model performance by considering combinations of, and translations between, different input representations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01266v3-abstract-full').style.display = 'none'; document.getElementById('2404.01266v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">1st Conference on Language Modeling (COLM), 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00260">arXiv:2403.00260</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.00260">pdf</a>, <a href="https://arxiv.org/format/2403.00260">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Extracting Polymer Nanocomposite Samples from Full-Length Documents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khalighinejad%2C+G">Ghazal Khalighinejad</a>, <a href="/search/cs?searchtype=author&amp;query=Circi%2C+D">Defne Circi</a>, <a href="/search/cs?searchtype=author&amp;query=Brinson%2C+L+C">L. C. Brinson</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00260v1-abstract-short" style="display: inline;"> This paper investigates the use of large language models (LLMs) for extracting sample lists of polymer nanocomposites (PNCs) from full-length materials science research papers. The challenge lies in the complex nature of PNC samples, which have numerous attributes scattered throughout the text. The complexity of annotating detailed information on PNCs limits the availability of data, making conven&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00260v1-abstract-full').style.display = 'inline'; document.getElementById('2403.00260v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00260v1-abstract-full" style="display: none;"> This paper investigates the use of large language models (LLMs) for extracting sample lists of polymer nanocomposites (PNCs) from full-length materials science research papers. The challenge lies in the complex nature of PNC samples, which have numerous attributes scattered throughout the text. The complexity of annotating detailed information on PNCs limits the availability of data, making conventional document-level relation extraction techniques impractical due to the challenge in creating comprehensive named entity span annotations. To address this, we introduce a new benchmark and an evaluation technique for this task and explore different prompting strategies in a zero-shot manner. We also incorporate self-consistency to improve the performance. Our findings show that even advanced LLMs struggle to extract all of the samples from an article. Finally, we analyze the errors encountered in this process, categorizing them into three main challenges, and discuss potential strategies for future research to overcome them. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00260v1-abstract-full').style.display = 'none'; document.getElementById('2403.00260v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.17916">arXiv:2402.17916</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.17916">pdf</a>, <a href="https://arxiv.org/format/2402.17916">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Adversarial Math Word Problem Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xie%2C+R">Roy Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+C">Chengxuan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Junlin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.17916v3-abstract-short" style="display: inline;"> Large language models (LLMs) have significantly transformed the educational landscape. As current plagiarism detection tools struggle to keep pace with LLMs&#39; rapid advancements, the educational community faces the challenge of assessing students&#39; true problem-solving abilities in the presence of LLMs. In this work, we explore a new paradigm for ensuring fair evaluation -- generating adversarial ex&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17916v3-abstract-full').style.display = 'inline'; document.getElementById('2402.17916v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.17916v3-abstract-full" style="display: none;"> Large language models (LLMs) have significantly transformed the educational landscape. As current plagiarism detection tools struggle to keep pace with LLMs&#39; rapid advancements, the educational community faces the challenge of assessing students&#39; true problem-solving abilities in the presence of LLMs. In this work, we explore a new paradigm for ensuring fair evaluation -- generating adversarial examples which preserve the structure and difficulty of the original questions aimed for assessment, but are unsolvable by LLMs. Focusing on the domain of math word problems, we leverage abstract syntax trees to structurally generate adversarial examples that cause LLMs to produce incorrect answers by simply editing the numeric values in the problems. We conduct experiments on various open- and closed-source LLMs, quantitatively and qualitatively demonstrating that our method significantly degrades their math problem-solving ability. We identify shared vulnerabilities among LLMs and propose a cost-effective approach to attack high-cost models. Additionally, we conduct automatic analysis to investigate the cause of failure, providing further insights into the limitations of LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17916v3-abstract-full').style.display = 'none'; document.getElementById('2402.17916v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code/data: https://github.com/ruoyuxie/adversarial_mwps_generation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.06544">arXiv:2402.06544</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.06544">pdf</a>, <a href="https://arxiv.org/format/2402.06544">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Calibrating Long-form Generations from Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yukun Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yixin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Thirukovalluru%2C+R">Raghuveer Thirukovalluru</a>, <a href="/search/cs?searchtype=author&amp;query=Cohan%2C+A">Arman Cohan</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.06544v2-abstract-short" style="display: inline;"> To enhance Large Language Models&#39; (LLMs) reliability, calibration is essential -- the model&#39;s assessed confidence scores should align with the actual likelihood of its responses being correct. However, current confidence elicitation methods and calibration metrics typically rely on a binary true/false assessment of response correctness. This approach does not apply to long-form generation, where a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.06544v2-abstract-full').style.display = 'inline'; document.getElementById('2402.06544v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.06544v2-abstract-full" style="display: none;"> To enhance Large Language Models&#39; (LLMs) reliability, calibration is essential -- the model&#39;s assessed confidence scores should align with the actual likelihood of its responses being correct. However, current confidence elicitation methods and calibration metrics typically rely on a binary true/false assessment of response correctness. This approach does not apply to long-form generation, where an answer can be partially correct. Addressing this gap, we introduce a unified calibration framework, in which both the correctness of the LLMs&#39; responses and their associated confidence levels are treated as distributions across a range of scores. Within this framework, we develop three metrics to precisely evaluate LLM calibration and further propose two confidence elicitation methods based on self-consistency and self-evaluation. Our experiments, which include long-form QA and summarization tasks, demonstrate that larger models don&#39;t necessarily guarantee better calibration, that calibration performance is found to be metric-dependent, and that self-consistency methods excel in factoid datasets. We also find that calibration can be enhanced through techniques such as fine-tuning, integrating relevant source documents, scaling the temperature, and combining self-consistency with self-evaluation. Lastly, we showcase a practical application of our system: selecting and cascading open-source models and ChatGPT to optimize correctness given a limited API budget. This research not only challenges existing notions of LLM calibration but also offers practical methodologies for improving trustworthiness in long-form generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.06544v2-abstract-full').style.display = 'none'; document.getElementById('2402.06544v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01783">arXiv:2402.01783</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.01783">pdf</a>, <a href="https://arxiv.org/format/2402.01783">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Multi-Label Classification of Online Vaccine Concerns </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+C+Q">Chloe Qinyu Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Stureborg%2C+R">Rickard Stureborg</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01783v1-abstract-short" style="display: inline;"> Vaccine concerns are an ever-evolving target, and can shift quickly as seen during the COVID-19 pandemic. Identifying longitudinal trends in vaccine concerns and misinformation might inform the healthcare space by helping public health efforts strategically allocate resources or information campaigns. We explore the task of detecting vaccine concerns in online discourse using large language models&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01783v1-abstract-full').style.display = 'inline'; document.getElementById('2402.01783v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01783v1-abstract-full" style="display: none;"> Vaccine concerns are an ever-evolving target, and can shift quickly as seen during the COVID-19 pandemic. Identifying longitudinal trends in vaccine concerns and misinformation might inform the healthcare space by helping public health efforts strategically allocate resources or information campaigns. We explore the task of detecting vaccine concerns in online discourse using large language models (LLMs) in a zero-shot setting without the need for expensive training datasets. Since real-time monitoring of online sources requires large-scale inference, we explore cost-accuracy trade-offs of different prompting strategies and offer concrete takeaways that may inform choices in system designs for current applications. An analysis of different prompting strategies reveals that classifying the concerns over multiple passes through the LLM, each consisting a boolean question whether the text mentions a vaccine concern or not, works the best. Our results indicate that GPT-4 can strongly outperform crowdworker accuracy when compared to ground truth annotations provided by experts on the recently introduced VaxConcerns dataset, achieving an overall F1 score of 78.7%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01783v1-abstract-full').style.display = 'none'; document.getElementById('2402.01783v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in AAAI 2024 Health Intelligence workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.19419">arXiv:2305.19419</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.19419">pdf</a>, <a href="https://arxiv.org/format/2305.19419">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Multi-Instance Multi-Label Learning for Detecting Propaganda Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+A">Anni Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.19419v1-abstract-short" style="display: inline;"> Since the introduction of the SemEval 2020 Task 11 (Martino et al., 2020a), several approaches have been proposed in the literature for classifying propaganda based on the rhetorical techniques used to influence readers. These methods, however, classify one span at a time, ignoring dependencies from the labels of other spans within the same context. In this paper, we approach propaganda technique&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19419v1-abstract-full').style.display = 'inline'; document.getElementById('2305.19419v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.19419v1-abstract-full" style="display: none;"> Since the introduction of the SemEval 2020 Task 11 (Martino et al., 2020a), several approaches have been proposed in the literature for classifying propaganda based on the rhetorical techniques used to influence readers. These methods, however, classify one span at a time, ignoring dependencies from the labels of other spans within the same context. In this paper, we approach propaganda technique classification as a Multi-Instance Multi-Label (MIML) learning problem (Zhou et al., 2012) and propose a simple RoBERTa-based model (Zhuang et al., 2021) for classifying all spans in an article simultaneously. Further, we note that, due to the annotation process where annotators classified the spans by following a decision tree, there is an inherent hierarchical relationship among the different techniques, which existing approaches ignore. We incorporate these hierarchical label dependencies by adding an auxiliary classifier for each node in the decision tree to the training objective and ensembling the predictions from the original and auxiliary classifiers at test time. Overall, our model leads to an absolute improvement of 2.47% micro-F1 over the model from the shared task winning team in a cross-validation setup and is the best performing non-ensemble model on the shared task leaderboard. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19419v1-abstract-full').style.display = 'none'; document.getElementById('2305.19419v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14613">arXiv:2305.14613</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.14613">pdf</a>, <a href="https://arxiv.org/format/2305.14613">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Selectively Answering Ambiguous Questions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cole%2C+J+R">Jeremy R. Cole</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M+J+Q">Michael J. Q. Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gillick%2C+D">Daniel Gillick</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenschlos%2C+J+M">Julian Martin Eisenschlos</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenstein%2C+J">Jacob Eisenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14613v2-abstract-short" style="display: inline;"> Trustworthy language models should abstain from answering questions when they do not know the answer. However, the answer to a question can be unknown for a variety of reasons. Prior research has focused on the case in which the question is clear and the answer is unambiguous but possibly unknown, but the answer to a question can also be unclear due to uncertainty of the questioner&#39;s intent or con&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14613v2-abstract-full').style.display = 'inline'; document.getElementById('2305.14613v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14613v2-abstract-full" style="display: none;"> Trustworthy language models should abstain from answering questions when they do not know the answer. However, the answer to a question can be unknown for a variety of reasons. Prior research has focused on the case in which the question is clear and the answer is unambiguous but possibly unknown, but the answer to a question can also be unclear due to uncertainty of the questioner&#39;s intent or context. We investigate question answering from this perspective, focusing on answering a subset of questions with a high degree of accuracy, from a set of questions in which many are inherently ambiguous. In this setting, we find that the most reliable approach to decide when to abstain involves quantifying repetition within sampled model outputs, rather than the model&#39;s likelihood or self-verification as used in prior work. We find this to be the case across different types of uncertainty and model scales,and with or without instruction tuning. Our results suggest that sampling-based confidence scores help calibrate answers to relatively unambiguous questions, with more dramatic improvements on ambiguous questions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14613v2-abstract-full').style.display = 'none'; document.getElementById('2305.14613v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in EMNLP 2023. 9 pages, 5 figures, 2 pages of appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.12860">arXiv:2303.12860</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.12860">pdf</a>, <a href="https://arxiv.org/format/2303.12860">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Salient Span Masking for Temporal Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cole%2C+J+R">Jeremy R. Cole</a>, <a href="/search/cs?searchtype=author&amp;query=Chaudhary%2C+A">Aditi Chaudhary</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Talukdar%2C+P">Partha Talukdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.12860v1-abstract-short" style="display: inline;"> Salient Span Masking (SSM) has shown itself to be an effective strategy to improve closed-book question answering performance. SSM extends general masked language model pretraining by creating additional unsupervised training sentences that mask a single entity or date span, thus oversampling factual information. Despite the success of this paradigm, the span types and sampling strategies are rela&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.12860v1-abstract-full').style.display = 'inline'; document.getElementById('2303.12860v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.12860v1-abstract-full" style="display: none;"> Salient Span Masking (SSM) has shown itself to be an effective strategy to improve closed-book question answering performance. SSM extends general masked language model pretraining by creating additional unsupervised training sentences that mask a single entity or date span, thus oversampling factual information. Despite the success of this paradigm, the span types and sampling strategies are relatively arbitrary and not widely studied for other tasks. Thus, we investigate SSM from the perspective of temporal tasks, where learning a good representation of various temporal expressions is important. To that end, we introduce Temporal Span Masking (TSM) intermediate training. First, we find that SSM alone improves the downstream performance on three temporal tasks by an avg. +5.8 points. Further, we are able to achieve additional improvements (avg. +0.29 points) by adding the TSM task. These comprise the new best reported results on the targeted tasks. Our analysis suggests that the effectiveness of SSM stems from the sentences chosen in the training data rather than the mask choice: sentences with entities frequently also contain temporal expressions. Nonetheless, the additional targeted spans of TSM can still improve performance, especially in a zero-shot context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.12860v1-abstract-full').style.display = 'none'; document.getElementById('2303.12860v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages 1 figure, to appear in EACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.05077">arXiv:2303.05077</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.05077">pdf</a>, <a href="https://arxiv.org/format/2303.05077">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning the Legibility of Visual Text Perturbations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Seth%2C+D">Dev Seth</a>, <a href="/search/cs?searchtype=author&amp;query=Stureborg%2C+R">Rickard Stureborg</a>, <a href="/search/cs?searchtype=author&amp;query=Pruthi%2C+D">Danish Pruthi</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.05077v2-abstract-short" style="display: inline;"> Many adversarial attacks in NLP perturb inputs to produce visually similar strings (&#39;ergo&#39; $\rightarrow$ &#39;$蔚$rgo&#39;) which are legible to humans but degrade model performance. Although preserving legibility is a necessary condition for text perturbation, little work has been done to systematically characterize it; instead, legibility is typically loosely enforced via intuitions around the nature and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.05077v2-abstract-full').style.display = 'inline'; document.getElementById('2303.05077v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.05077v2-abstract-full" style="display: none;"> Many adversarial attacks in NLP perturb inputs to produce visually similar strings (&#39;ergo&#39; $\rightarrow$ &#39;$蔚$rgo&#39;) which are legible to humans but degrade model performance. Although preserving legibility is a necessary condition for text perturbation, little work has been done to systematically characterize it; instead, legibility is typically loosely enforced via intuitions around the nature and extent of perturbations. Particularly, it is unclear to what extent can inputs be perturbed while preserving legibility, or how to quantify the legibility of a perturbed string. In this work, we address this gap by learning models that predict the legibility of a perturbed string, and rank candidate perturbations based on their legibility. To do so, we collect and release LEGIT, a human-annotated dataset comprising the legibility of visually perturbed text. Using this dataset, we build both text- and vision-based models which achieve up to $0.91$ F1 score in predicting whether an input is legible, and an accuracy of $0.86$ in predicting which of two given perturbations is more legible. Additionally, we discover that legible perturbations from the LEGIT dataset are more effective at lowering the performance of NLP models than best-known attack strategies, suggesting that current models may be vulnerable to a broad range of perturbations beyond what is captured by existing visual attacks. Data, code, and models are available at https://github.com/dvsth/learning-legibility-2023. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.05077v2-abstract-full').style.display = 'none'; document.getElementById('2303.05077v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 7 figures. Accepted at EACL 2023 (main, long)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.00242">arXiv:2303.00242</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.00242">pdf</a>, <a href="https://arxiv.org/format/2303.00242">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DIFFQG: Generating Questions to Summarize Factual Changes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cole%2C+J+R">Jeremy R. Cole</a>, <a href="/search/cs?searchtype=author&amp;query=Jain%2C+P">Palak Jain</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenschlos%2C+J+M">Julian Martin Eisenschlos</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M+J+Q">Michael J. Q. Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+E">Eunsol Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.00242v1-abstract-short" style="display: inline;"> Identifying the difference between two versions of the same article is useful to update knowledge bases and to understand how articles evolve. Paired texts occur naturally in diverse situations: reporters write similar news stories and maintainers of authoritative websites must keep their information up to date. We propose representing factual changes between paired documents as question-answer pa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.00242v1-abstract-full').style.display = 'inline'; document.getElementById('2303.00242v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.00242v1-abstract-full" style="display: none;"> Identifying the difference between two versions of the same article is useful to update knowledge bases and to understand how articles evolve. Paired texts occur naturally in diverse situations: reporters write similar news stories and maintainers of authoritative websites must keep their information up to date. We propose representing factual changes between paired documents as question-answer pairs, where the answer to the same question differs between two versions. We find that question-answer pairs can flexibly and concisely capture the updated contents. Provided with paired documents, annotators identify questions that are answered by one passage but answered differently or cannot be answered by the other. We release DIFFQG which consists of 759 QA pairs and 1153 examples of paired passages with no factual change. These questions are intended to be both unambiguous and information-seeking and involve complex edits, pushing beyond the capabilities of current question generation and factual change detection systems. Our dataset summarizes the changes between two versions of the document as questions and answers, studying automatic update summarization in a novel way. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.00242v1-abstract-full').style.display = 'none'; document.getElementById('2303.00242v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages. Accepted at EACL 2023 (main, long)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.02990">arXiv:2302.02990</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.02990">pdf</a>, <a href="https://arxiv.org/format/2302.02990">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3544548.3581431">10.1145/3544548.3581431 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Interface Design for Crowdsourcing Hierarchical Multi-Label Text Annotations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Stureborg%2C+R">Rickard Stureborg</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jun Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.02990v2-abstract-short" style="display: inline;"> Human data labeling is an important and expensive task at the heart of supervised learning systems. Hierarchies help humans understand and organize concepts. We ask whether and how concept hierarchies can inform the design of annotation interfaces to improve labeling quality and efficiency. We study this question through annotation of vaccine misinformation, where the labeling task is difficult an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02990v2-abstract-full').style.display = 'inline'; document.getElementById('2302.02990v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.02990v2-abstract-full" style="display: none;"> Human data labeling is an important and expensive task at the heart of supervised learning systems. Hierarchies help humans understand and organize concepts. We ask whether and how concept hierarchies can inform the design of annotation interfaces to improve labeling quality and efficiency. We study this question through annotation of vaccine misinformation, where the labeling task is difficult and highly subjective. We investigate 6 user interface designs for crowdsourcing hierarchical labels by collecting over 18,000 individual annotations. Under a fixed budget, integrating hierarchies into the design improves crowdsource workers&#39; F1 scores. We attribute this to (1) Grouping similar concepts, improving F1 scores by +0.16 over random groupings, (2) Strong relative performance on high-difficulty examples (relative F1 score difference of +0.40), and (3) Filtering out obvious negatives, increasing precision by +0.07. Ultimately, labeling schemes integrating the hierarchy outperform those that do not - achieving mean F1 of 0.70. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02990v2-abstract-full').style.display = 'none'; document.getElementById('2302.02990v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in CHI-2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.06869">arXiv:2209.06869</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.06869">pdf</a>, <a href="https://arxiv.org/format/2209.06869">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On the State of the Art in Authorship Attribution and Authorship Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tyo%2C+J">Jacob Tyo</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Lipton%2C+Z+C">Zachary C. Lipton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.06869v2-abstract-short" style="display: inline;"> Despite decades of research on authorship attribution (AA) and authorship verification (AV), inconsistent dataset splits/filtering and mismatched evaluation methods make it difficult to assess the state of the art. In this paper, we present a survey of the fields, resolve points of confusion, introduce Valla that standardizes and benchmarks AA/AV datasets and metrics, provide a large-scale empiric&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.06869v2-abstract-full').style.display = 'inline'; document.getElementById('2209.06869v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.06869v2-abstract-full" style="display: none;"> Despite decades of research on authorship attribution (AA) and authorship verification (AV), inconsistent dataset splits/filtering and mismatched evaluation methods make it difficult to assess the state of the art. In this paper, we present a survey of the fields, resolve points of confusion, introduce Valla that standardizes and benchmarks AA/AV datasets and metrics, provide a large-scale empirical evaluation, and provide apples-to-apples comparisons between existing methods. We evaluate eight promising methods on fifteen datasets (including distribution-shifted challenge sets) and introduce a new large-scale dataset based on texts archived by Project Gutenberg. Surprisingly, we find that a traditional Ngram-based model performs best on 5 (of 7) AA tasks, achieving an average macro-accuracy of $76.50\%$ (compared to $66.71\%$ for a BERT-based model). However, on the two AA datasets with the greatest number of words per author, as well as on the AV datasets, BERT-based models perform best. While AV methods are easily applied to AA, they are seldom included as baselines in AA papers. We show that through the application of hard-negative mining, AV methods are competitive alternatives to AA methods. Valla and all experiment code can be found here: https://github.com/JacobTyo/Valla <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.06869v2-abstract-full').style.display = 'none'; document.getElementById('2209.06869v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.07288">arXiv:2204.07288</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.07288">pdf</a>, <a href="https://arxiv.org/format/2204.07288">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Characterizing the Efficiency vs. Accuracy Trade-off for Long-Context NLP Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ang%2C+P">Phyllis Ang</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Wills%2C+L+W">Lisa Wu Wills</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.07288v1-abstract-short" style="display: inline;"> With many real-world applications of Natural Language Processing (NLP) comprising of long texts, there has been a rise in NLP benchmarks that measure the accuracy of models that can handle longer input sequences. However, these benchmarks do not consider the trade-offs between accuracy, speed, and power consumption as input sizes or model sizes are varied. In this work, we perform a systematic stu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07288v1-abstract-full').style.display = 'inline'; document.getElementById('2204.07288v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.07288v1-abstract-full" style="display: none;"> With many real-world applications of Natural Language Processing (NLP) comprising of long texts, there has been a rise in NLP benchmarks that measure the accuracy of models that can handle longer input sequences. However, these benchmarks do not consider the trade-offs between accuracy, speed, and power consumption as input sizes or model sizes are varied. In this work, we perform a systematic study of this accuracy vs. efficiency trade-off on two widely used long-sequence models - Longformer-Encoder-Decoder (LED) and Big Bird - during fine-tuning and inference on four datasets from the SCROLLS benchmark. To study how this trade-off differs across hyperparameter settings, we compare the models across four sequence lengths (1024, 2048, 3072, 4096) and two model sizes (base and large) under a fixed resource budget. We find that LED consistently achieves better accuracy at lower energy costs than Big Bird. For summarization, we find that increasing model size is more energy efficient than increasing sequence length for higher accuracy. However, this comes at the cost of a large drop in inference speed. For question answering, we find that smaller models are both more efficient and more accurate due to the larger training batch sizes possible under a fixed resource budget. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07288v1-abstract-full').style.display = 'none'; document.getElementById('2204.07288v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NLP Power! Workshop on Efficient Benchmarking in NLP at ACL2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.06092">arXiv:2204.06092</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.06092">pdf</a>, <a href="https://arxiv.org/format/2204.06092">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ASQA: Factoid Questions Meet Long-Form Answers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Stelmakh%2C+I">Ivan Stelmakh</a>, <a href="/search/cs?searchtype=author&amp;query=Luan%2C+Y">Yi Luan</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+M">Ming-Wei Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.06092v2-abstract-short" style="display: inline;"> An abundance of datasets and availability of reliable evaluation metrics have resulted in strong progress in factoid question answering (QA). This progress, however, does not easily transfer to the task of long-form QA, where the goal is to answer questions that require in-depth explanations. The hurdles include (i) a lack of high-quality data, and (ii) the absence of a well-defined notion of the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.06092v2-abstract-full').style.display = 'inline'; document.getElementById('2204.06092v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.06092v2-abstract-full" style="display: none;"> An abundance of datasets and availability of reliable evaluation metrics have resulted in strong progress in factoid question answering (QA). This progress, however, does not easily transfer to the task of long-form QA, where the goal is to answer questions that require in-depth explanations. The hurdles include (i) a lack of high-quality data, and (ii) the absence of a well-defined notion of the answer&#39;s quality. In this work, we address these problems by (i) releasing a novel dataset and a task that we call ASQA (Answer Summaries for Questions which are Ambiguous); and (ii) proposing a reliable metric for measuring performance on ASQA. Our task focuses on factoid questions that are ambiguous, that is, have different correct answers depending on interpretation. Answers to ambiguous questions should synthesize factual information from multiple sources into a long-form summary that resolves the ambiguity. In contrast to existing long-form QA tasks (such as ELI5), ASQA admits a clear notion of correctness: a user faced with a good summary should be able to answer different interpretations of the original ambiguous question. We use this notion of correctness to define an automated metric of performance for ASQA. Our analysis demonstrates an agreement between this metric and human judgments, and reveals a considerable gap between human performance and strong baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.06092v2-abstract-full').style.display = 'none'; document.getElementById('2204.06092v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">A minor bug in computing the ROUGE score was fixed. The fix **did not** result in any changes in observations and conclusions</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.15110">arXiv:2106.15110</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.15110">pdf</a>, <a href="https://arxiv.org/format/2106.15110">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1162/tacl_a_00459">10.1162/tacl_a_00459 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Time-Aware Language Models as Temporal Knowledge Bases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Cole%2C+J+R">Jeremy R. Cole</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenschlos%2C+J+M">Julian Martin Eisenschlos</a>, <a href="/search/cs?searchtype=author&amp;query=Gillick%2C+D">Daniel Gillick</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenstein%2C+J">Jacob Eisenstein</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.15110v2-abstract-short" style="display: inline;"> Many facts come with an expiration date, from the name of the President to the basketball team Lebron James plays for. But language models (LMs) are trained on snapshots of data collected at a specific moment in time, and this can limit their utility, especially in the closed-book setting where the pretraining corpus must contain the facts the model should memorize. We introduce a diagnostic datas&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.15110v2-abstract-full').style.display = 'inline'; document.getElementById('2106.15110v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.15110v2-abstract-full" style="display: none;"> Many facts come with an expiration date, from the name of the President to the basketball team Lebron James plays for. But language models (LMs) are trained on snapshots of data collected at a specific moment in time, and this can limit their utility, especially in the closed-book setting where the pretraining corpus must contain the facts the model should memorize. We introduce a diagnostic dataset aimed at probing LMs for factual knowledge that changes over time and highlight problems with LMs at either end of the spectrum -- those trained on specific slices of temporal data, as well as those trained on a wide range of temporal data. To mitigate these problems, we propose a simple technique for jointly modeling text with its timestamp. This improves memorization of seen facts from the training time period, as well as calibration on predictions about unseen facts from future time periods. We also show that models trained with temporal context can be efficiently &#34;refreshed&#34; as new data arrives, without the need for retraining from scratch. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.15110v2-abstract-full').style.display = 'none'; document.getElementById('2106.15110v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Version accepted to TACL</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Transactions of the Association for Computational Linguistics 2022; 10 257-273 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.04725">arXiv:2104.04725</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2104.04725">pdf</a>, <a href="https://arxiv.org/format/2104.04725">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Fool Me Twice: Entailment from Wikipedia Gamification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Eisenschlos%2C+J+M">Julian Martin Eisenschlos</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Bulian%2C+J">Jannis Bulian</a>, <a href="/search/cs?searchtype=author&amp;query=B%C3%B6rschinger%2C+B">Benjamin B枚rschinger</a>, <a href="/search/cs?searchtype=author&amp;query=Boyd-Graber%2C+J">Jordan Boyd-Graber</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.04725v1-abstract-short" style="display: inline;"> We release FoolMeTwice (FM2 for short), a large dataset of challenging entailment pairs collected through a fun multi-player game. Gamification encourages adversarial examples, drastically lowering the number of examples that can be solved using &#34;shortcuts&#34; compared to other popular entailment datasets. Players are presented with two tasks. The first task asks the player to write a plausible claim&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.04725v1-abstract-full').style.display = 'inline'; document.getElementById('2104.04725v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.04725v1-abstract-full" style="display: none;"> We release FoolMeTwice (FM2 for short), a large dataset of challenging entailment pairs collected through a fun multi-player game. Gamification encourages adversarial examples, drastically lowering the number of examples that can be solved using &#34;shortcuts&#34; compared to other popular entailment datasets. Players are presented with two tasks. The first task asks the player to write a plausible claim based on the evidence from a Wikipedia page. The second one shows two plausible claims written by other players, one of which is false, and the goal is to identify it before the time runs out. Players &#34;pay&#34; to see clues retrieved from the evidence pool: the more evidence the player needs, the harder the claim. Game-play between motivated players leads to diverse strategies for crafting claims, such as temporal inference and diverting to unrelated evidence, and results in higher quality data for the entailment and evidence retrieval tasks. We open source the dataset and the game code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.04725v1-abstract-full').style.display = 'none'; document.getElementById('2104.04725v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in NAACL 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.07043">arXiv:2102.07043</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2102.07043">pdf</a>, <a href="https://arxiv.org/format/2102.07043">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Reasoning Over Virtual Knowledge Bases With Open Predicate Relations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.07043v2-abstract-short" style="display: inline;"> We present the Open Predicate Query Language (OPQL); a method for constructing a virtual KB (VKB) trained entirely from text. Large Knowledge Bases (KBs) are indispensable for a wide-range of industry applications such as question answering and recommendation. Typically, KBs encode world knowledge in a structured, readily accessible form derived from laborious human annotation efforts. Unfortunate&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.07043v2-abstract-full').style.display = 'inline'; document.getElementById('2102.07043v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.07043v2-abstract-full" style="display: none;"> We present the Open Predicate Query Language (OPQL); a method for constructing a virtual KB (VKB) trained entirely from text. Large Knowledge Bases (KBs) are indispensable for a wide-range of industry applications such as question answering and recommendation. Typically, KBs encode world knowledge in a structured, readily accessible form derived from laborious human annotation efforts. Unfortunately, while they are extremely high precision, KBs are inevitably highly incomplete and automated methods for enriching them are far too inaccurate. Instead, OPQL constructs a VKB by encoding and indexing a set of relation mentions in a way that naturally enables reasoning and can be trained without any structured supervision. We demonstrate that OPQL outperforms prior VKB methods on two different KB reasoning tasks and, additionally, can be used as an external memory integrated into a language model (OPQL-LM) leading to improvements on two open-domain question answering tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.07043v2-abstract-full').style.display = 'none'; document.getElementById('2102.07043v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the 38th International Conference on Machine Learning, PMLR 139, 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.00893">arXiv:2012.00893</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2012.00893">pdf</a>, <a href="https://arxiv.org/format/2012.00893">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Explanations: How much do explanations from the teacher aid students? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pruthi%2C+D">Danish Pruthi</a>, <a href="/search/cs?searchtype=author&amp;query=Bansal%2C+R">Rachit Bansal</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Soares%2C+L+B">Livio Baldini Soares</a>, <a href="/search/cs?searchtype=author&amp;query=Collins%2C+M">Michael Collins</a>, <a href="/search/cs?searchtype=author&amp;query=Lipton%2C+Z+C">Zachary C. Lipton</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.00893v2-abstract-short" style="display: inline;"> While many methods purport to explain predictions by highlighting salient features, what aims these explanations serve and how they ought to be evaluated often go unstated. In this work, we introduce a framework to quantify the value of explanations via the accuracy gains that they confer on a student model trained to simulate a teacher model. Crucially, the explanations are available to the stude&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.00893v2-abstract-full').style.display = 'inline'; document.getElementById('2012.00893v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.00893v2-abstract-full" style="display: none;"> While many methods purport to explain predictions by highlighting salient features, what aims these explanations serve and how they ought to be evaluated often go unstated. In this work, we introduce a framework to quantify the value of explanations via the accuracy gains that they confer on a student model trained to simulate a teacher model. Crucially, the explanations are available to the student during training, but are not available at test time. Compared to prior proposals, our approach is less easily gamed, enabling principled, automatic, model-agnostic evaluation of attributions. Using our framework, we compare numerous attribution methods for text classification and question answering, and observe quantitative differences that are consistent (to a moderate to high degree) across different student model architectures and learning strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.00893v2-abstract-full').style.display = 'none'; document.getElementById('2012.00893v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">TACL 2021 (pre-MIT Press publication version)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2011.01459">arXiv:2011.01459</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2011.01459">pdf</a>, <a href="https://arxiv.org/format/2011.01459">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Weakly- and Semi-supervised Evidence Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pruthi%2C+D">Danish Pruthi</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&amp;query=Lipton%2C+Z+C">Zachary C. Lipton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2011.01459v1-abstract-short" style="display: inline;"> For many prediction tasks, stakeholders desire not only predictions but also supporting evidence that a human can use to verify its correctness. However, in practice, additional annotations marking supporting evidence may only be available for a minority of training examples (if available at all). In this paper, we propose new methods to combine few evidence annotations (strong semi-supervision) w&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.01459v1-abstract-full').style.display = 'inline'; document.getElementById('2011.01459v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2011.01459v1-abstract-full" style="display: none;"> For many prediction tasks, stakeholders desire not only predictions but also supporting evidence that a human can use to verify its correctness. However, in practice, additional annotations marking supporting evidence may only be available for a minority of training examples (if available at all). In this paper, we propose new methods to combine few evidence annotations (strong semi-supervision) with abundant document-level labels (weak supervision) for the task of evidence extraction. Evaluating on two classification tasks that feature evidence annotations, we find that our methods outperform baselines adapted from the interpretability literature to our task. Our approach yields substantial gains with as few as hundred evidence annotations. Code and datasets to reproduce our work are available at https://github.com/danishpruthi/evidence-extraction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2011.01459v1-abstract-full').style.display = 'none'; document.getElementById('2011.01459v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the Findings of EMNLP 2020, to be presented at BlackBoxNLP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.14439">arXiv:2010.14439</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.14439">pdf</a>, <a href="https://arxiv.org/format/2010.14439">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Differentiable Open-Ended Commonsense Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+B+Y">Bill Yuchen Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Zaheer%2C+M">Manzil Zaheer</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+X">Xiang Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.14439v2-abstract-short" style="display: inline;"> Current commonsense reasoning research focuses on developing models that use commonsense knowledge to answer multiple-choice questions. However, systems designed to answer multiple-choice questions may not be useful in applications that do not provide a small list of candidate answers to choose from. As a step towards making commonsense reasoning research more realistic, we propose to study open-e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.14439v2-abstract-full').style.display = 'inline'; document.getElementById('2010.14439v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.14439v2-abstract-full" style="display: none;"> Current commonsense reasoning research focuses on developing models that use commonsense knowledge to answer multiple-choice questions. However, systems designed to answer multiple-choice questions may not be useful in applications that do not provide a small list of candidate answers to choose from. As a step towards making commonsense reasoning research more realistic, we propose to study open-ended commonsense reasoning (OpenCSR) -- the task of answering a commonsense question without any pre-defined choices -- using as a resource only a corpus of commonsense facts written in natural language. OpenCSR is challenging due to a large decision space, and because many questions require implicit multi-hop reasoning. As an approach to OpenCSR, we propose DrFact, an efficient Differentiable model for multi-hop Reasoning over knowledge Facts. To evaluate OpenCSR methods, we adapt several popular commonsense reasoning benchmarks, and collect multiple new answers for each test question via crowd-sourcing. Experiments show that DrFact outperforms strong baseline methods by a large margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.14439v2-abstract-full').style.display = 'none'; document.getElementById('2010.14439v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL 2021. Project website: https://open-csr.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.14373">arXiv:2004.14373</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.14373">pdf</a>, <a href="https://arxiv.org/format/2004.14373">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ToTTo: A Controlled Table-To-Text Generation Dataset </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Parikh%2C+A+P">Ankur P. Parikh</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xuezhi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gehrmann%2C+S">Sebastian Gehrmann</a>, <a href="/search/cs?searchtype=author&amp;query=Faruqui%2C+M">Manaal Faruqui</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+D">Diyi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Das%2C+D">Dipanjan Das</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.14373v3-abstract-short" style="display: inline;"> We present ToTTo, an open-domain English table-to-text dataset with over 120,000 training examples that proposes a controlled generation task: given a Wikipedia table and a set of highlighted table cells, produce a one-sentence description. To obtain generated targets that are natural but also faithful to the source table, we introduce a dataset construction process where annotators directly revis&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.14373v3-abstract-full').style.display = 'inline'; document.getElementById('2004.14373v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.14373v3-abstract-full" style="display: none;"> We present ToTTo, an open-domain English table-to-text dataset with over 120,000 training examples that proposes a controlled generation task: given a Wikipedia table and a set of highlighted table cells, produce a one-sentence description. To obtain generated targets that are natural but also faithful to the source table, we introduce a dataset construction process where annotators directly revise existing candidate sentences from Wikipedia. We present systematic analyses of our dataset and annotation process as well as results achieved by several state-of-the-art baselines. While usually fluent, existing methods often hallucinate phrases that are not supported by the table, suggesting that this dataset can serve as a useful research benchmark for high-precision conditional text generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.14373v3-abstract-full').style.display = 'none'; document.getElementById('2004.14373v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.10640">arXiv:2002.10640</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2002.10640">pdf</a>, <a href="https://arxiv.org/format/2002.10640">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Differentiable Reasoning over a Virtual Knowledge Base </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Zaheer%2C+M">Manzil Zaheer</a>, <a href="/search/cs?searchtype=author&amp;query=Balachandran%2C+V">Vidhisha Balachandran</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.10640v1-abstract-short" style="display: inline;"> We consider the task of answering complex multi-hop questions using a corpus as a virtual knowledge base (KB). In particular, we describe a neural module, DrKIT, that traverses textual data like a KB, softly following paths of relations between mentions of entities in the corpus. At each step the module uses a combination of sparse-matrix TFIDF indices and a maximum inner product search (MIPS) on&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.10640v1-abstract-full').style.display = 'inline'; document.getElementById('2002.10640v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.10640v1-abstract-full" style="display: none;"> We consider the task of answering complex multi-hop questions using a corpus as a virtual knowledge base (KB). In particular, we describe a neural module, DrKIT, that traverses textual data like a KB, softly following paths of relations between mentions of entities in the corpus. At each step the module uses a combination of sparse-matrix TFIDF indices and a maximum inner product search (MIPS) on a special index of contextual representations of the mentions. This module is differentiable, so the full system can be trained end-to-end using gradient based methods, starting from natural language inputs. We also describe a pretraining scheme for the contextual representation encoder by generating hard negative examples using existing knowledge bases. We show that DrKIT improves accuracy by 9 points on 3-hop questions in the MetaQA dataset, cutting the gap between text-based and KB-based state-of-the-art by 70%. On HotpotQA, DrKIT leads to a 10% improvement over a BERT-based re-ranking approach to retrieving the relevant passages required to answer a question. DrKIT is also very efficient, processing 10-100x more queries per second than existing multi-hop systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.10640v1-abstract-full').style.display = 'none'; document.getElementById('2002.10640v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.07913">arXiv:1909.07913</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1909.07913">pdf</a>, <a href="https://arxiv.org/format/1909.07913">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning to Deceive with Attention-Based Explanations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pruthi%2C+D">Danish Pruthi</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+M">Mansi Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Neubig%2C+G">Graham Neubig</a>, <a href="/search/cs?searchtype=author&amp;query=Lipton%2C+Z+C">Zachary C. Lipton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.07913v2-abstract-short" style="display: inline;"> Attention mechanisms are ubiquitous components in neural architectures applied to natural language processing. In addition to yielding gains in predictive accuracy, attention weights are often claimed to confer interpretability, purportedly useful both for providing insights to practitioners and for explaining why a model makes its decisions to stakeholders. We call the latter use of attention mec&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.07913v2-abstract-full').style.display = 'inline'; document.getElementById('1909.07913v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.07913v2-abstract-full" style="display: none;"> Attention mechanisms are ubiquitous components in neural architectures applied to natural language processing. In addition to yielding gains in predictive accuracy, attention weights are often claimed to confer interpretability, purportedly useful both for providing insights to practitioners and for explaining why a model makes its decisions to stakeholders. We call the latter use of attention mechanisms into question by demonstrating a simple method for training models to produce deceptive attention masks. Our method diminishes the total weight assigned to designated impermissible tokens, even when the models can be shown to nevertheless rely on these features to drive predictions. Across multiple models and tasks, our approach manipulates attention weights while paying surprisingly little cost in accuracy. Through a human study, we show that our manipulated attention-based explanations deceive people into thinking that predictions from a model biased against gender minorities do not rely on the gender. Consequently, our results cast doubt on attention&#39;s reliability as a tool for auditing algorithms in the context of fairness and accountability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.07913v2-abstract-full').style.display = 'none'; document.getElementById('1909.07913v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2020 as a long paper. Updated version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.06146">arXiv:1909.06146</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1909.06146">pdf</a>, <a href="https://arxiv.org/format/1909.06146">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> PubMedQA: A Dataset for Biomedical Research Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jin%2C+Q">Qiao Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhengping Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+X">Xinghua Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.06146v1-abstract-short" style="display: inline;"> We introduce PubMedQA, a novel biomedical question answering (QA) dataset collected from PubMed abstracts. The task of PubMedQA is to answer research questions with yes/no/maybe (e.g.: Do preoperative statins reduce atrial fibrillation after coronary artery bypass grafting?) using the corresponding abstracts. PubMedQA has 1k expert-annotated, 61.2k unlabeled and 211.3k artificially generated QA in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.06146v1-abstract-full').style.display = 'inline'; document.getElementById('1909.06146v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.06146v1-abstract-full" style="display: none;"> We introduce PubMedQA, a novel biomedical question answering (QA) dataset collected from PubMed abstracts. The task of PubMedQA is to answer research questions with yes/no/maybe (e.g.: Do preoperative statins reduce atrial fibrillation after coronary artery bypass grafting?) using the corresponding abstracts. PubMedQA has 1k expert-annotated, 61.2k unlabeled and 211.3k artificially generated QA instances. Each PubMedQA instance is composed of (1) a question which is either an existing research article title or derived from one, (2) a context which is the corresponding abstract without its conclusion, (3) a long answer, which is the conclusion of the abstract and, presumably, answers the research question, and (4) a yes/no/maybe answer which summarizes the conclusion. PubMedQA is the first QA dataset where reasoning over biomedical research texts, especially their quantitative contents, is required to answer the questions. Our best performing model, multi-phase fine-tuning of BioBERT with long answer bag-of-word statistics as additional supervision, achieves 68.1% accuracy, compared to single human performance of 78.0% accuracy and majority-baseline of 55.2% accuracy, leaving much room for improvement. PubMedQA is publicly available at https://pubmedqa.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.06146v1-abstract-full').style.display = 'none'; document.getElementById('1909.06146v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.01081">arXiv:1906.01081</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1906.01081">pdf</a>, <a href="https://arxiv.org/format/1906.01081">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Handling Divergent Reference Texts when Evaluating Table-to-Text Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Faruqui%2C+M">Manaal Faruqui</a>, <a href="/search/cs?searchtype=author&amp;query=Parikh%2C+A">Ankur Parikh</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+M">Ming-Wei Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Das%2C+D">Dipanjan Das</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.01081v1-abstract-short" style="display: inline;"> Automatically constructed datasets for generating text from semi-structured data (tables), such as WikiBio, often contain reference texts that diverge from the information in the corresponding semi-structured data. We show that metrics which rely solely on the reference texts, such as BLEU and ROUGE, show poor correlation with human judgments when those references diverge. We propose a new metric,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.01081v1-abstract-full').style.display = 'inline'; document.getElementById('1906.01081v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.01081v1-abstract-full" style="display: none;"> Automatically constructed datasets for generating text from semi-structured data (tables), such as WikiBio, often contain reference texts that diverge from the information in the corresponding semi-structured data. We show that metrics which rely solely on the reference texts, such as BLEU and ROUGE, show poor correlation with human judgments when those references diverge. We propose a new metric, PARENT, which aligns n-grams from the reference and generated texts to the semi-structured data before computing their precision and recall. Through a large scale human evaluation study of table-to-text models for WikiBio, we show that PARENT correlates with human judgments better than existing text generation metrics. We also adapt and evaluate the information extraction based evaluation proposed by Wiseman et al (2017), and show that PARENT has comparable correlation to it, while being easier to use. We show that PARENT is also applicable when the reference texts are elicited from humans using the data from the WebNLG challenge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.01081v1-abstract-full').style.display = 'none'; document.getElementById('1906.01081v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at ACL 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.11268">arXiv:1905.11268</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1905.11268">pdf</a>, <a href="https://arxiv.org/format/1905.11268">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Combating Adversarial Misspellings with Robust Word Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pruthi%2C+D">Danish Pruthi</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Lipton%2C+Z+C">Zachary C. Lipton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.11268v2-abstract-short" style="display: inline;"> To combat adversarial spelling mistakes, we propose placing a word recognition model in front of the downstream classifier. Our word recognition models build upon the RNN semi-character architecture, introducing several new backoff strategies for handling rare and unseen words. Trained to recognize words corrupted by random adds, drops, swaps, and keyboard mistakes, our method achieves 32% relativ&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.11268v2-abstract-full').style.display = 'inline'; document.getElementById('1905.11268v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.11268v2-abstract-full" style="display: none;"> To combat adversarial spelling mistakes, we propose placing a word recognition model in front of the downstream classifier. Our word recognition models build upon the RNN semi-character architecture, introducing several new backoff strategies for handling rare and unseen words. Trained to recognize words corrupted by random adds, drops, swaps, and keyboard mistakes, our method achieves 32% relative (and 3.3% absolute) error reduction over the vanilla semi-character model. Notably, our pipeline confers robustness on the downstream classifier, outperforming both adversarial training and off-the-shelf spell checkers. Against a BERT model fine-tuned for sentiment analysis, a single adversarially-chosen character attack lowers accuracy from 90.3% to 45.8%. Our defense restores accuracy to 75%. Surprisingly, better word recognition does not always entail greater robustness. Our analysis reveals that robustness also depends upon a quantity that we denote the sensitivity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.11268v2-abstract-full').style.display = 'none'; document.getElementById('1905.11268v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2019, long paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.04428">arXiv:1904.04428</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1904.04428">pdf</a>, <a href="https://arxiv.org/format/1904.04428">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Text Generation with Exemplar-based Adaptive Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Peng%2C+H">Hao Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Parikh%2C+A+P">Ankur P. Parikh</a>, <a href="/search/cs?searchtype=author&amp;query=Faruqui%2C+M">Manaal Faruqui</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Das%2C+D">Dipanjan Das</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.04428v2-abstract-short" style="display: inline;"> We propose a novel conditioned text generation model. It draws inspiration from traditional template-based text generation techniques, where the source provides the content (i.e., what to say), and the template influences how to say it. Building on the successful encoder-decoder paradigm, it first encodes the content representation from the given input text; to produce the output, it retrieves exe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.04428v2-abstract-full').style.display = 'inline'; document.getElementById('1904.04428v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.04428v2-abstract-full" style="display: none;"> We propose a novel conditioned text generation model. It draws inspiration from traditional template-based text generation techniques, where the source provides the content (i.e., what to say), and the template influences how to say it. Building on the successful encoder-decoder paradigm, it first encodes the content representation from the given input text; to produce the output, it retrieves exemplar text from the training data as &#34;soft templates,&#34; which are then used to construct an exemplar-specific decoder. We evaluate the proposed model on abstractive text summarization and data-to-text generation. Empirical results show that this model achieves strong performance and outperforms comparable baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.04428v2-abstract-full').style.display = 'none'; document.getElementById('1904.04428v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.02181">arXiv:1904.02181</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1904.02181">pdf</a>, <a href="https://arxiv.org/format/1904.02181">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Probing Biomedical Embeddings from Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jin%2C+Q">Qiao Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+X">Xinghua Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.02181v1-abstract-short" style="display: inline;"> Contextualized word embeddings derived from pre-trained language models (LMs) show significant improvements on downstream NLP tasks. Pre-training on domain-specific corpora, such as biomedical articles, further improves their performance. In this paper, we conduct probing experiments to determine what additional information is carried intrinsically by the in-domain trained contextualized embedding&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.02181v1-abstract-full').style.display = 'inline'; document.getElementById('1904.02181v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.02181v1-abstract-full" style="display: none;"> Contextualized word embeddings derived from pre-trained language models (LMs) show significant improvements on downstream NLP tasks. Pre-training on domain-specific corpora, such as biomedical articles, further improves their performance. In this paper, we conduct probing experiments to determine what additional information is carried intrinsically by the in-domain trained contextualized embeddings. For this we use the pre-trained LMs as fixed feature extractors and restrict the downstream task models to not have additional sequence modeling layers. We compare BERT, ELMo, BioBERT and BioELMo, a biomedical version of ELMo trained on 10M PubMed abstracts. Surprisingly, while fine-tuned BioBERT is better than BioELMo in biomedical NER and NLI tasks, as a fixed feature extractor BioELMo outperforms BioBERT in our probing tasks. We use visualization and nearest neighbor analysis to show that better encoding of entity-type and relational information leads to this superiority. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.02181v1-abstract-full').style.display = 'none'; document.getElementById('1904.02181v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL-HLT 2019 Workshop on Evaluating Vector Space Representations for NLP (RepEval)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1809.00782">arXiv:1809.00782</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1809.00782">pdf</a>, <a href="https://arxiv.org/format/1809.00782">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Open Domain Question Answering Using Early Fusion of Knowledge Bases and Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Haitian Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Zaheer%2C+M">Manzil Zaheer</a>, <a href="/search/cs?searchtype=author&amp;query=Mazaitis%2C+K">Kathryn Mazaitis</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1809.00782v1-abstract-short" style="display: inline;"> Open Domain Question Answering (QA) is evolving from complex pipelined systems to end-to-end deep neural networks. Specialized neural models have been developed for extracting answers from either text alone or Knowledge Bases (KBs) alone. In this paper we look at a more practical setting, namely QA over the combination of a KB and entity-linked text, which is appropriate when an incomplete KB is a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.00782v1-abstract-full').style.display = 'inline'; document.getElementById('1809.00782v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1809.00782v1-abstract-full" style="display: none;"> Open Domain Question Answering (QA) is evolving from complex pipelined systems to end-to-end deep neural networks. Specialized neural models have been developed for extracting answers from either text alone or Knowledge Bases (KBs) alone. In this paper we look at a more practical setting, namely QA over the combination of a KB and entity-linked text, which is appropriate when an incomplete KB is available with a large text corpus. Building on recent advances in graph representation learning we propose a novel model, GRAFT-Net, for extracting answers from a question-specific subgraph containing text and KB entities and relations. We construct a suite of benchmark tasks for this problem, varying the difficulty of questions, the amount of training data, and KB completeness. We show that GRAFT-Net is competitive with the state-of-the-art when tested using either KBs or text alone, and vastly outperforms existing methods in the combined setting. Source code is available at https://github.com/OceanskySun/GraftNet . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1809.00782v1-abstract-full').style.display = 'none'; document.getElementById('1809.00782v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1806.05662">arXiv:1806.05662</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1806.05662">pdf</a>, <a href="https://arxiv.org/format/1806.05662">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> GLoMo: Unsupervisedly Learned Relational Graphs as Transferable Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhilin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+J">Jake Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+K">Kaiming He</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a>, <a href="/search/cs?searchtype=author&amp;query=LeCun%2C+Y">Yann LeCun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1806.05662v3-abstract-short" style="display: inline;"> Modern deep transfer learning approaches have mainly focused on learning generic feature vectors from one task that are transferable to other tasks, such as word embeddings in language and pretrained convolutional features in vision. However, these approaches usually transfer unary features and largely ignore more structured graphical representations. This work explores the possibility of learning&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.05662v3-abstract-full').style.display = 'inline'; document.getElementById('1806.05662v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1806.05662v3-abstract-full" style="display: none;"> Modern deep transfer learning approaches have mainly focused on learning generic feature vectors from one task that are transferable to other tasks, such as word embeddings in language and pretrained convolutional features in vision. However, these approaches usually transfer unary features and largely ignore more structured graphical representations. This work explores the possibility of learning generic latent relational graphs that capture dependencies between pairs of data units (e.g., words or pixels) from large-scale unlabeled data and transferring the graphs to downstream tasks. Our proposed transfer learning framework improves performance on various tasks including question answering, natural language inference, sentiment analysis, and image classification. We also show that the learned graphs are generic enough to be transferred to different embeddings on which the graphs have not been trained (including GloVe embeddings, ELMo embeddings, and task-specific RNN hidden unit), or embedding-free units such as image pixels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.05662v3-abstract-full').style.display = 'none'; document.getElementById('1806.05662v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1806.04313">arXiv:1806.04313</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1806.04313">pdf</a>, <a href="https://arxiv.org/format/1806.04313">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Embedding Text in Hyperbolic Spaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Shallue%2C+C+J">Christopher J. Shallue</a>, <a href="/search/cs?searchtype=author&amp;query=Norouzi%2C+M">Mohammad Norouzi</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+A+M">Andrew M. Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Dahl%2C+G+E">George E. Dahl</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1806.04313v1-abstract-short" style="display: inline;"> Natural language text exhibits hierarchical structure in a variety of respects. Ideally, we could incorporate our prior knowledge of this hierarchical structure into unsupervised learning algorithms that work on text data. Recent work by Nickel &amp; Kiela (2017) proposed using hyperbolic instead of Euclidean embedding spaces to represent hierarchical data and demonstrated encouraging results when emb&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.04313v1-abstract-full').style.display = 'inline'; document.getElementById('1806.04313v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1806.04313v1-abstract-full" style="display: none;"> Natural language text exhibits hierarchical structure in a variety of respects. Ideally, we could incorporate our prior knowledge of this hierarchical structure into unsupervised learning algorithms that work on text data. Recent work by Nickel &amp; Kiela (2017) proposed using hyperbolic instead of Euclidean embedding spaces to represent hierarchical data and demonstrated encouraging results when embedding graphs. In this work, we extend their method with a re-parameterization technique that allows us to learn hyperbolic embeddings of arbitrarily parameterized objects. We apply this framework to learn word and sentence embeddings in hyperbolic space in an unsupervised manner from text corpora. The resulting embeddings seem to encode certain intuitive notions of hierarchy, such as word-context frequency and phrase constituency. However, the implicit continuous hierarchy in the learned hyperbolic space makes interrogating the model&#39;s learned hierarchies more difficult than for models that learn explicit edges between items. The learned hyperbolic embeddings show improvements over Euclidean embeddings in some -- but not all -- downstream tasks, suggesting that hierarchical organization is more useful for some tasks than others. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1806.04313v1-abstract-full').style.display = 'none'; document.getElementById('1806.04313v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">TextGraphs 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1804.05922">arXiv:1804.05922</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1804.05922">pdf</a>, <a href="https://arxiv.org/format/1804.05922">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Neural Models for Reasoning over Multiple Mentions using Coreference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+Q">Qiao Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhilin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1804.05922v1-abstract-short" style="display: inline;"> Many problems in NLP require aggregating information from multiple mentions of the same entity which may be far apart in the text. Existing Recurrent Neural Network (RNN) layers are biased towards short-term dependencies and hence not suited to such tasks. We present a recurrent layer which is instead biased towards coreferent dependencies. The layer uses coreference annotations extracted from an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.05922v1-abstract-full').style.display = 'inline'; document.getElementById('1804.05922v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1804.05922v1-abstract-full" style="display: none;"> Many problems in NLP require aggregating information from multiple mentions of the same entity which may be far apart in the text. Existing Recurrent Neural Network (RNN) layers are biased towards short-term dependencies and hence not suited to such tasks. We present a recurrent layer which is instead biased towards coreferent dependencies. The layer uses coreference annotations extracted from an external system to connect entity mentions belonging to the same cluster. Incorporating this layer into a state-of-the-art reading comprehension model improves performance on three datasets -- Wikihop, LAMBADA and the bAbi AI tasks -- with large gains when training data is scarce. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.05922v1-abstract-full').style.display = 'none'; document.getElementById('1804.05922v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 April, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2018 (Short Paper)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1804.00720">arXiv:1804.00720</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1804.00720">pdf</a>, <a href="https://arxiv.org/format/1804.00720">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Simple and Effective Semi-Supervised Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Pruthi%2C+D">Danish Pruthi</a>, <a href="/search/cs?searchtype=author&amp;query=Rajagopal%2C+D">Dheeraj Rajagopal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1804.00720v1-abstract-short" style="display: inline;"> Recent success of deep learning models for the task of extractive Question Answering (QA) is hinged on the availability of large annotated corpora. However, large domain specific annotated corpora are limited and expensive to construct. In this work, we envision a system where the end user specifies a set of base documents and only a few labelled examples. Our system exploits the document structur&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.00720v1-abstract-full').style.display = 'inline'; document.getElementById('1804.00720v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1804.00720v1-abstract-full" style="display: none;"> Recent success of deep learning models for the task of extractive Question Answering (QA) is hinged on the availability of large annotated corpora. However, large domain specific annotated corpora are limited and expensive to construct. In this work, we envision a system where the end user specifies a set of base documents and only a few labelled examples. Our system exploits the document structure to create cloze-style questions from these base documents; pre-trains a powerful neural network on the cloze style questions; and further fine-tunes the model on the labeled examples. We evaluate our proposed system across three diverse datasets from different domains, and find it to be highly effective with very little labeled data. We attain more than 50% F1 score on SQuAD and TriviaQA with less than a thousand labelled examples. We are also releasing a set of 3.2M cloze-style questions for practitioners to use while building QA systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.00720v1-abstract-full').style.display = 'none'; document.getElementById('1804.00720v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Short paper, NAACL 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1707.03904">arXiv:1707.03904</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1707.03904">pdf</a>, <a href="https://arxiv.org/format/1707.03904">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Quasar: Datasets for Question Answering by Search and Reading </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Mazaitis%2C+K">Kathryn Mazaitis</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1707.03904v2-abstract-short" style="display: inline;"> We present two new large-scale datasets aimed at evaluating systems designed to comprehend a natural language query and extract its answer from a large corpus of text. The Quasar-S dataset consists of 37000 cloze-style (fill-in-the-gap) queries constructed from definitions of software entity tags on the popular website Stack Overflow. The posts and comments on the website serve as the background c&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1707.03904v2-abstract-full').style.display = 'inline'; document.getElementById('1707.03904v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1707.03904v2-abstract-full" style="display: none;"> We present two new large-scale datasets aimed at evaluating systems designed to comprehend a natural language query and extract its answer from a large corpus of text. The Quasar-S dataset consists of 37000 cloze-style (fill-in-the-gap) queries constructed from definitions of software entity tags on the popular website Stack Overflow. The posts and comments on the website serve as the background corpus for answering the cloze questions. The Quasar-T dataset consists of 43000 open-domain trivia questions and their answers obtained from various internet sources. ClueWeb09 serves as the background corpus for extracting these answers. We pose these datasets as a challenge for two related subtasks of factoid Question Answering: (1) searching for relevant pieces of text that include the correct answer to a query, and (2) reading the retrieved text to answer the query. We also describe a retrieval system for extracting relevant sentences and documents from the corpus given a query, and include these in the release for researchers wishing to only focus on (2). We evaluate several baselines on both datasets, ranging from simple heuristics to powerful neural models, and show that these lag behind human performance by 16.4% and 32.1% for Quasar-S and -T respectively. The datasets are available at https://github.com/bdhingra/quasar . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1707.03904v2-abstract-full').style.display = 'none'; document.getElementById('1707.03904v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 July, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1703.08885">arXiv:1703.08885</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1703.08885">pdf</a>, <a href="https://arxiv.org/format/1703.08885">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Question Answering from Unstructured Text by Retrieval and Comprehension </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Watanabe%2C+Y">Yusuke Watanabe</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1703.08885v1-abstract-short" style="display: inline;"> Open domain Question Answering (QA) systems must interact with external knowledge sources, such as web pages, to find relevant information. Information sources like Wikipedia, however, are not well structured and difficult to utilize in comparison with Knowledge Bases (KBs). In this work we present a two-step approach to question answering from unstructured text, consisting of a retrieval step and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.08885v1-abstract-full').style.display = 'inline'; document.getElementById('1703.08885v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1703.08885v1-abstract-full" style="display: none;"> Open domain Question Answering (QA) systems must interact with external knowledge sources, such as web pages, to find relevant information. Information sources like Wikipedia, however, are not well structured and difficult to utilize in comparison with Knowledge Bases (KBs). In this work we present a two-step approach to question answering from unstructured text, consisting of a retrieval step and a comprehension step. For comprehension, we present an RNN based attention model with a novel mixture mechanism for selecting answers from either retrieved articles or a fixed vocabulary. For retrieval we introduce a hand-crafted model and a neural model for ranking relevant articles. We achieve state-of-the-art performance on W IKI M OVIES dataset, reducing the error by 40%. Our experimental results further demonstrate the importance of each of the introduced components. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.08885v1-abstract-full').style.display = 'none'; document.getElementById('1703.08885v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1703.02620">arXiv:1703.02620</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1703.02620">pdf</a>, <a href="https://arxiv.org/format/1703.02620">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Linguistic Knowledge as Memory for Recurrent Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhilin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1703.02620v1-abstract-short" style="display: inline;"> Training recurrent neural networks to model long term dependencies is difficult. Hence, we propose to use external linguistic knowledge as an explicit signal to inform the model which memories it should utilize. Specifically, external knowledge is used to augment a sequence with typed edges between arbitrarily distant elements, and the resulting graph is decomposed into directed acyclic subgraphs.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.02620v1-abstract-full').style.display = 'inline'; document.getElementById('1703.02620v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1703.02620v1-abstract-full" style="display: none;"> Training recurrent neural networks to model long term dependencies is difficult. Hence, we propose to use external linguistic knowledge as an explicit signal to inform the model which memories it should utilize. Specifically, external knowledge is used to augment a sequence with typed edges between arbitrarily distant elements, and the resulting graph is decomposed into directed acyclic subgraphs. We introduce a model that encodes such graphs as explicit memory in recurrent neural networks, and use it to model coreference relations in text. We apply our model to several text comprehension tasks and achieve new state-of-the-art results on all considered benchmarks, including CNN, bAbi, and LAMBADA. On the bAbi QA tasks, our model solves 15 out of the 20 tasks with only 1000 training examples per task. Analysis of the learned representations further demonstrates the ability of our model to encode fine-grained entity information across a document. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.02620v1-abstract-full').style.display = 'none'; document.getElementById('1703.02620v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1703.01557">arXiv:1703.01557</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1703.01557">pdf</a>, <a href="https://arxiv.org/format/1703.01557">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Using Graphs of Classifiers to Impose Declarative Constraints on Semi-supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bing%2C+L">Lidong Bing</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a>, <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1703.01557v2-abstract-short" style="display: inline;"> We propose a general approach to modeling semi-supervised learning (SSL) algorithms. Specifically, we present a declarative language for modeling both traditional supervised classification tasks and many SSL heuristics, including both well-known heuristics such as co-training and novel domain-specific heuristics. In addition to representing individual SSL heuristics, we show that multiple heuristi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.01557v2-abstract-full').style.display = 'inline'; document.getElementById('1703.01557v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1703.01557v2-abstract-full" style="display: none;"> We propose a general approach to modeling semi-supervised learning (SSL) algorithms. Specifically, we present a declarative language for modeling both traditional supervised classification tasks and many SSL heuristics, including both well-known heuristics such as co-training and novel domain-specific heuristics. In addition to representing individual SSL heuristics, we show that multiple heuristics can be automatically combined using Bayesian optimization methods. We experiment with two classes of tasks, link-based text classification and relation extraction. We show modest improvements on well-studied link-based classification benchmarks, and state-of-the-art results on relation-extraction tasks for two realistic domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.01557v2-abstract-full').style.display = 'none'; document.getElementById('1703.01557v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1703.00993">arXiv:1703.00993</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1703.00993">pdf</a>, <a href="https://arxiv.org/format/1703.00993">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Comparative Study of Word Embeddings for Reading Comprehension </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dhingra%2C+B">Bhuwan Dhingra</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Hanxiao Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a>, <a href="/search/cs?searchtype=author&amp;query=Cohen%2C+W+W">William W. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1703.00993v1-abstract-short" style="display: inline;"> The focus of past machine learning research for Reading Comprehension tasks has been primarily on the design of novel deep learning architectures. Here we show that seemingly minor choices made on (1) the use of pre-trained word embeddings, and (2) the representation of out-of-vocabulary tokens at test time, can turn out to have a larger impact than architectural choices on the final performance.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.00993v1-abstract-full').style.display = 'inline'; document.getElementById('1703.00993v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1703.00993v1-abstract-full" style="display: none;"> The focus of past machine learning research for Reading Comprehension tasks has been primarily on the design of novel deep learning architectures. Here we show that seemingly minor choices made on (1) the use of pre-trained word embeddings, and (2) the representation of out-of-vocabulary tokens at test time, can turn out to have a larger impact than architectural choices on the final performance. We systematically explore several options for these choices, and provide recommendations to researchers working in this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1703.00993v1-abstract-full').style.display = 'none'; document.getElementById('1703.00993v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2017. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Dhingra%2C+B&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Dhingra%2C+B&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Dhingra%2C+B&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10