Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 148 results for author: <span class="mathjax">Goldberg, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Goldberg%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Goldberg, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Goldberg%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Goldberg, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Goldberg%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Goldberg%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Goldberg%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Goldberg%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08869">arXiv:2501.08869</a> <span> [<a href="https://arxiv.org/pdf/2501.08869">pdf</a>, <a href="https://arxiv.org/format/2501.08869">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Silent Abandonment in Text-Based Contact Centers: Identifying, Quantifying, and Mitigating its Operational Impacts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Castellanos%2C+A">Antonio Castellanos</a>, <a href="/search/cs?searchtype=author&query=Yom-Tov%2C+G+B">Galit B. Yom-Tov</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yair Goldberg</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Jaeyoung Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08869v2-abstract-short" style="display: inline;"> In the quest to improve services, companies offer customers the option to interact with agents via texting. Such contact centers face unique challenges compared to traditional call centers, as measuring customer experience proxies like abandonment and patience involves uncertainty. A key source of this uncertainty is silent abandonment, where customers leave without notifying the system, wasting a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08869v2-abstract-full').style.display = 'inline'; document.getElementById('2501.08869v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08869v2-abstract-full" style="display: none;"> In the quest to improve services, companies offer customers the option to interact with agents via texting. Such contact centers face unique challenges compared to traditional call centers, as measuring customer experience proxies like abandonment and patience involves uncertainty. A key source of this uncertainty is silent abandonment, where customers leave without notifying the system, wasting agent time and leaving their status unclear. Silent abandonment also obscures whether a customer was served or left. Our goals are to measure the magnitude of silent abandonment and mitigate its effects. Classification models show that 3%-70% of customers across 17 companies abandon silently. In one study, 71.3% of abandoning customers did so silently, reducing agent efficiency by 3.2% and system capacity by 15.3%, incurring $5,457 in annual costs per agent. We develop an expectation-maximization (EM) algorithm to estimate customer patience under uncertainty and identify influencing covariates. We find that companies should use classification models to estimate abandonment scope and our EM algorithm to assess patience. We suggest strategies to operationally mitigate the impact of silent abandonment by predicting suspected silent-abandonment behavior or changing service design. Specifically, we show that while allowing customers to write while waiting in the queue creates a missing data challenge, it also significantly increases patience and reduces service time, leading to reduced abandonment and lower staffing requirements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08869v2-abstract-full').style.display = 'none'; document.getElementById('2501.08869v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">75% of the paper is an updated version of arXiv:2304.11754</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05434">arXiv:2412.05434</a> <span> [<a href="https://arxiv.org/pdf/2412.05434">pdf</a>, <a href="https://arxiv.org/format/2412.05434">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Diversity Over Quantity: A Lesson From Few Shot Relation Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cohen%2C+A+D">Amir DN Cohen</a>, <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Shmidman%2C+S">Shaltiel Shmidman</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05434v1-abstract-short" style="display: inline;"> In few-shot relation classification (FSRC), models must generalize to novel relations with only a few labeled examples. While much of the recent progress in NLP has focused on scaling data size, we argue that diversity in relation types is more crucial for FSRC performance. In this work, we demonstrate that training on a diverse set of relations significantly enhances a model's ability to generali… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05434v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05434v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05434v1-abstract-full" style="display: none;"> In few-shot relation classification (FSRC), models must generalize to novel relations with only a few labeled examples. While much of the recent progress in NLP has focused on scaling data size, we argue that diversity in relation types is more crucial for FSRC performance. In this work, we demonstrate that training on a diverse set of relations significantly enhances a model's ability to generalize to unseen relations, even when the overall dataset size remains fixed. We introduce REBEL-FS, a new FSRC benchmark that incorporates an order of magnitude more relation types than existing datasets. Through systematic experiments, we show that increasing the diversity of relation types in the training data leads to consistent gains in performance across various few-shot learning scenarios, including high-negative settings. Our findings challenge the common assumption that more data alone leads to better performance and suggest that targeted data curation focused on diversity can substantially reduce the need for large-scale datasets in FSRC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05434v1-abstract-full').style.display = 'none'; document.getElementById('2412.05434v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22592">arXiv:2410.22592</a> <span> [<a href="https://arxiv.org/pdf/2410.22592">pdf</a>, <a href="https://arxiv.org/format/2410.22592">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GRADE: Quantifying Sample Diversity in Text-to-Image Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rassin%2C+R">Royi Rassin</a>, <a href="/search/cs?searchtype=author&query=Slobodkin%2C+A">Aviv Slobodkin</a>, <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Elazar%2C+Y">Yanai Elazar</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22592v1-abstract-short" style="display: inline;"> Text-to-image (T2I) models are remarkable at generating realistic images based on textual descriptions. However, textual prompts are inherently underspecified: they do not specify all possible attributes of the required image. This raises two key questions: Do T2I models generate diverse outputs on underspecified prompts? How can we automatically measure diversity? We propose GRADE: Granular Attri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22592v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22592v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22592v1-abstract-full" style="display: none;"> Text-to-image (T2I) models are remarkable at generating realistic images based on textual descriptions. However, textual prompts are inherently underspecified: they do not specify all possible attributes of the required image. This raises two key questions: Do T2I models generate diverse outputs on underspecified prompts? How can we automatically measure diversity? We propose GRADE: Granular Attribute Diversity Evaluation, an automatic method for quantifying sample diversity. GRADE leverages the world knowledge embedded in large language models and visual question-answering systems to identify relevant concept-specific axes of diversity (e.g., ``shape'' and ``color'' for the concept ``cookie''). It then estimates frequency distributions of concepts and their attributes and quantifies diversity using (normalized) entropy. GRADE achieves over 90% human agreement while exhibiting weak correlation to commonly used diversity metrics. We use GRADE to measure the overall diversity of 12 T2I models using 400 concept-attribute pairs, revealing that all models display limited variation. Further, we find that these models often exhibit default behaviors, a phenomenon where the model consistently generates concepts with the same attributes (e.g., 98% of the cookies are round). Finally, we demonstrate that a key reason for low diversity is due to underspecified captions in training data. Our work proposes a modern, semantically-driven approach to measure sample diversity and highlights the stunning homogeneity in outputs by T2I models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22592v1-abstract-full').style.display = 'none'; document.getElementById('2410.22592v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">For project page and code see https://royira.github.io/GRADE</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17051">arXiv:2410.17051</a> <span> [<a href="https://arxiv.org/pdf/2410.17051">pdf</a>, <a href="https://arxiv.org/format/2410.17051">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Data-driven Coreference-based Ontology Building </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ashury-Tahan%2C+S">Shir Ashury-Tahan</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+A+D+N">Amir David Nissan Cohen</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+N">Nadav Cohen</a>, <a href="/search/cs?searchtype=author&query=Louzoun%2C+Y">Yoram Louzoun</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17051v1-abstract-short" style="display: inline;"> While coreference resolution is traditionally used as a component in individual document understanding, in this work we take a more global view and explore what can we learn about a domain from the set of all document-level coreference relations that are present in a large corpus. We derive coreference chains from a corpus of 30 million biomedical abstracts and construct a graph based on the strin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17051v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17051v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17051v1-abstract-full" style="display: none;"> While coreference resolution is traditionally used as a component in individual document understanding, in this work we take a more global view and explore what can we learn about a domain from the set of all document-level coreference relations that are present in a large corpus. We derive coreference chains from a corpus of 30 million biomedical abstracts and construct a graph based on the string phrases within these chains, establishing connections between phrases if they co-occur within the same coreference chain. We then use the graph structure and the betweeness centrality measure to distinguish between edges denoting hierarchy, identity and noise, assign directionality to edges denoting hierarchy, and split nodes (strings) that correspond to multiple distinct concepts. The result is a rich, data-driven ontology over concepts in the biomedical domain, parts of which overlaps significantly with human-authored ontologies. We release the coreference chains and resulting ontology under a creative-commons license, along with the code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17051v1-abstract-full').style.display = 'none'; document.getElementById('2410.17051v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> EMNLP 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15836">arXiv:2408.15836</a> <span> [<a href="https://arxiv.org/pdf/2408.15836">pdf</a>, <a href="https://arxiv.org/format/2408.15836">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Knowledge Navigator: LLM-guided Browsing Framework for Exploratory Search in Scientific Literature </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Katz%2C+U">Uri Katz</a>, <a href="/search/cs?searchtype=author&query=Levy%2C+M">Mosh Levy</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15836v1-abstract-short" style="display: inline;"> The exponential growth of scientific literature necessitates advanced tools for effective knowledge exploration. We present Knowledge Navigator, a system designed to enhance exploratory search abilities by organizing and structuring the retrieved documents from broad topical queries into a navigable, two-level hierarchy of named and descriptive scientific topics and subtopics. This structured orga… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15836v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15836v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15836v1-abstract-full" style="display: none;"> The exponential growth of scientific literature necessitates advanced tools for effective knowledge exploration. We present Knowledge Navigator, a system designed to enhance exploratory search abilities by organizing and structuring the retrieved documents from broad topical queries into a navigable, two-level hierarchy of named and descriptive scientific topics and subtopics. This structured organization provides an overall view of the research themes in a domain, while also enabling iterative search and deeper knowledge discovery within specific subtopics by allowing users to refine their focus and retrieve additional relevant documents. Knowledge Navigator combines LLM capabilities with cluster-based methods to enable an effective browsing method. We demonstrate our approach's effectiveness through automatic and manual evaluations on two novel benchmarks, CLUSTREC-COVID and SCITOC. Our code, prompts, and benchmarks are made publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15836v1-abstract-full').style.display = 'none'; document.getElementById('2408.15836v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.21530">arXiv:2407.21530</a> <span> [<a href="https://arxiv.org/pdf/2407.21530">pdf</a>, <a href="https://arxiv.org/format/2407.21530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Data Contamination Report from the 2024 CONDA Shared Task </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sainz%2C+O">Oscar Sainz</a>, <a href="/search/cs?searchtype=author&query=Garc%C3%ADa-Ferrero%2C+I">Iker Garc铆a-Ferrero</a>, <a href="/search/cs?searchtype=author&query=Jacovi%2C+A">Alon Jacovi</a>, <a href="/search/cs?searchtype=author&query=Campos%2C+J+A">Jon Ander Campos</a>, <a href="/search/cs?searchtype=author&query=Elazar%2C+Y">Yanai Elazar</a>, <a href="/search/cs?searchtype=author&query=Agirre%2C+E">Eneko Agirre</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei-Lin Chen</a>, <a href="/search/cs?searchtype=author&query=Chim%2C+J">Jenny Chim</a>, <a href="/search/cs?searchtype=author&query=Choshen%2C+L">Leshem Choshen</a>, <a href="/search/cs?searchtype=author&query=D%27Amico-Wong%2C+L">Luca D'Amico-Wong</a>, <a href="/search/cs?searchtype=author&query=Dell%2C+M">Melissa Dell</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+R">Run-Ze Fan</a>, <a href="/search/cs?searchtype=author&query=Golchin%2C+S">Shahriar Golchin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yucheng Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Pengfei Liu</a>, <a href="/search/cs?searchtype=author&query=Pahwa%2C+B">Bhavish Pahwa</a>, <a href="/search/cs?searchtype=author&query=Prabhu%2C+A">Ameya Prabhu</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+S">Suryansh Sharma</a>, <a href="/search/cs?searchtype=author&query=Silcock%2C+E">Emily Silcock</a>, <a href="/search/cs?searchtype=author&query=Solonko%2C+K">Kateryna Solonko</a>, <a href="/search/cs?searchtype=author&query=Stap%2C+D">David Stap</a>, <a href="/search/cs?searchtype=author&query=Surdeanu%2C+M">Mihai Surdeanu</a>, <a href="/search/cs?searchtype=author&query=Tseng%2C+Y">Yu-Min Tseng</a>, <a href="/search/cs?searchtype=author&query=Udandarao%2C+V">Vishaal Udandarao</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.21530v2-abstract-short" style="display: inline;"> The 1st Workshop on Data Contamination (CONDA 2024) focuses on all relevant aspects of data contamination in natural language processing, where data contamination is understood as situations where evaluation data is included in pre-training corpora used to train large scale models, compromising evaluation results. The workshop fostered a shared task to collect evidence on data contamination in cur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21530v2-abstract-full').style.display = 'inline'; document.getElementById('2407.21530v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.21530v2-abstract-full" style="display: none;"> The 1st Workshop on Data Contamination (CONDA 2024) focuses on all relevant aspects of data contamination in natural language processing, where data contamination is understood as situations where evaluation data is included in pre-training corpora used to train large scale models, compromising evaluation results. The workshop fostered a shared task to collect evidence on data contamination in current available datasets and models. The goal of the shared task and associated database is to assist the community in understanding the extent of the problem and to assist researchers in avoiding reporting evaluation results on known contaminated resources. The shared task provides a structured, centralized public database for the collection of contamination evidence, open to contributions from the community via GitHub pool requests. This first compilation paper is based on 566 reported entries over 91 contaminated sources from a total of 23 contributors. The details of the individual contamination events are available in the platform. The platform continues to be online, open to contributions from the community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21530v2-abstract-full').style.display = 'none'; document.getElementById('2407.21530v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://huggingface.co/spaces/CONDA-Workshop/Data-Contamination-Database</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10626">arXiv:2407.10626</a> <span> [<a href="https://arxiv.org/pdf/2407.10626">pdf</a>, <a href="https://arxiv.org/format/2407.10626">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> NoviCode: Generating Programs from Natural Language Utterances by Novices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mordechai%2C+A+A">Asaf Achi Mordechai</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Tsarfaty%2C+R">Reut Tsarfaty</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10626v2-abstract-short" style="display: inline;"> Current Text-to-Code models demonstrate impressive capabilities in generating executable code from natural language snippets. However, current studies focus on technical instructions and programmer-oriented language, and it is an open question whether these models can effectively translate natural language descriptions given by non-technical users and express complex goals, to an executable progra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10626v2-abstract-full').style.display = 'inline'; document.getElementById('2407.10626v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10626v2-abstract-full" style="display: none;"> Current Text-to-Code models demonstrate impressive capabilities in generating executable code from natural language snippets. However, current studies focus on technical instructions and programmer-oriented language, and it is an open question whether these models can effectively translate natural language descriptions given by non-technical users and express complex goals, to an executable program that contains an intricate flow - composed of API access and control structures as loops, conditions, and sequences. To unlock the challenge of generating a complete program from a plain non-technical description we present NoviCode, a novel NL Programming task, which takes as input an API and a natural language description by a novice non-programmer and provides an executable program as output. To assess the efficacy of models on this task, we provide a novel benchmark accompanied by test suites wherein the generated program code is assessed not according to their form, but according to their functional execution. Our experiments show that, first, NoviCode is indeed a challenging task in the code synthesis domain, and that generating complex code from non-technical instructions goes beyond the current Text-to-Code paradigm. Second, we show that a novel approach wherein we align the NL utterances with the compositional hierarchical structure of the code, greatly enhances the performance of LLMs on this task, compared with the end-to-end Text-to-Code counterparts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10626v2-abstract-full').style.display = 'none'; document.getElementById('2407.10626v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.16048">arXiv:2406.16048</a> <span> [<a href="https://arxiv.org/pdf/2406.16048">pdf</a>, <a href="https://arxiv.org/format/2406.16048">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Evaluating D-MERIT of Partial-annotation on Information Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rassin%2C+R">Royi Rassin</a>, <a href="/search/cs?searchtype=author&query=Fairstein%2C+Y">Yaron Fairstein</a>, <a href="/search/cs?searchtype=author&query=Kalinsky%2C+O">Oren Kalinsky</a>, <a href="/search/cs?searchtype=author&query=Kushilevitz%2C+G">Guy Kushilevitz</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+N">Nachshon Cohen</a>, <a href="/search/cs?searchtype=author&query=Libov%2C+A">Alexander Libov</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.16048v2-abstract-short" style="display: inline;"> Retrieval models are often evaluated on partially-annotated datasets. Each query is mapped to a few relevant texts and the remaining corpus is assumed to be irrelevant. As a result, models that successfully retrieve false negatives are punished in evaluation. Unfortunately, completely annotating all texts for every query is not resource efficient. In this work, we show that using partially-annotat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16048v2-abstract-full').style.display = 'inline'; document.getElementById('2406.16048v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.16048v2-abstract-full" style="display: none;"> Retrieval models are often evaluated on partially-annotated datasets. Each query is mapped to a few relevant texts and the remaining corpus is assumed to be irrelevant. As a result, models that successfully retrieve false negatives are punished in evaluation. Unfortunately, completely annotating all texts for every query is not resource efficient. In this work, we show that using partially-annotated datasets in evaluation can paint a distorted picture. We curate D-MERIT, a passage retrieval evaluation set from Wikipedia, aspiring to contain all relevant passages for each query. Queries describe a group (e.g., "journals about linguistics") and relevant passages are evidence that entities belong to the group (e.g., a passage indicating that "Language" is a journal about linguistics). We show that evaluating on a dataset containing annotations for only a subset of the relevant passages might result in misleading ranking of the retrieval systems and that as more relevant texts are included in the evaluation set, the rankings converge. We propose our dataset as a resource for evaluation and our study as a recommendation for balance between resource-efficiency and reliable evaluation when annotating evaluation sets for text retrieval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.16048v2-abstract-full').style.display = 'none'; document.getElementById('2406.16048v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2024 main track. Our dataset can be downloaded from https://D-MERIT.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06283">arXiv:2404.06283</a> <span> [<a href="https://arxiv.org/pdf/2404.06283">pdf</a>, <a href="https://arxiv.org/format/2404.06283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LLMs' Reading Comprehension Is Affected by Parametric Knowledge and Struggles with Hypothetical Statements </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Basmov%2C+V">Victoria Basmov</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Tsarfaty%2C+R">Reut Tsarfaty</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06283v1-abstract-short" style="display: inline;"> The task of reading comprehension (RC), often implemented as context-based question answering (QA), provides a primary means to assess language models' natural language understanding (NLU) capabilities. Yet, when applied to large language models (LLMs) with extensive built-in world knowledge, this method can be deceptive. If the context aligns with the LLMs' internal knowledge, it is hard to disce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06283v1-abstract-full').style.display = 'inline'; document.getElementById('2404.06283v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06283v1-abstract-full" style="display: none;"> The task of reading comprehension (RC), often implemented as context-based question answering (QA), provides a primary means to assess language models' natural language understanding (NLU) capabilities. Yet, when applied to large language models (LLMs) with extensive built-in world knowledge, this method can be deceptive. If the context aligns with the LLMs' internal knowledge, it is hard to discern whether the models' answers stem from context comprehension or from LLMs' internal information. Conversely, using data that conflicts with the models' knowledge creates erroneous trends which distort the results. To address this issue, we suggest to use RC on imaginary data, based on fictitious facts and entities. This task is entirely independent of the models' world knowledge, enabling us to evaluate LLMs' linguistic abilities without the interference of parametric knowledge. Testing ChatGPT, GPT-4, LLaMA 2 and Mixtral on such imaginary data, we uncover a class of linguistic phenomena posing a challenge to current LLMs, involving thinking in terms of alternative, hypothetical scenarios. While all the models handle simple affirmative and negative contexts with high accuracy, they are much more prone to error when dealing with modal and conditional contexts. Crucially, these phenomena also trigger the LLMs' vulnerability to knowledge-conflicts again. In particular, while some models prove virtually unaffected by knowledge conflicts in affirmative and negative contexts, when faced with more semantically involved modal and conditional environments, they often fail to separate the text from their internal knowledge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06283v1-abstract-full').style.display = 'none'; document.getElementById('2404.06283v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15827">arXiv:2403.15827</a> <span> [<a href="https://arxiv.org/pdf/2403.15827">pdf</a>, <a href="https://arxiv.org/ps/2403.15827">ps</a>, <a href="https://arxiv.org/format/2403.15827">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Permutation Recovery Problem against Deletion Errors for DNA Data Storage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singhvi%2C+S">Shubhransh Singhvi</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+C">Charchit Gupta</a>, <a href="/search/cs?searchtype=author&query=Boruchovsky%2C+A">Avital Boruchovsky</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yuval Goldberg</a>, <a href="/search/cs?searchtype=author&query=Kiah%2C+H+M">Han Mao Kiah</a>, <a href="/search/cs?searchtype=author&query=Yaakobi%2C+E">Eitan Yaakobi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15827v1-abstract-short" style="display: inline;"> Owing to its immense storage density and durability, DNA has emerged as a promising storage medium. However, due to technological constraints, data can only be written onto many short DNA molecules called data blocks that are stored in an unordered way. To handle the unordered nature of DNA data storage systems, a unique address is typically prepended to each data block to form a DNA strand. Howev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15827v1-abstract-full').style.display = 'inline'; document.getElementById('2403.15827v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15827v1-abstract-full" style="display: none;"> Owing to its immense storage density and durability, DNA has emerged as a promising storage medium. However, due to technological constraints, data can only be written onto many short DNA molecules called data blocks that are stored in an unordered way. To handle the unordered nature of DNA data storage systems, a unique address is typically prepended to each data block to form a DNA strand. However, DNA storage systems are prone to errors and generate multiple noisy copies of each strand called DNA reads. Thus, we study the permutation recovery problem against deletions errors for DNA data storage. The permutation recovery problem for DNA data storage requires one to reconstruct the addresses or in other words to uniquely identify the noisy reads. By successfully reconstructing the addresses, one can essentially determine the correct order of the data blocks, effectively solving the clustering problem. We first show that we can almost surely identify all the noisy reads under certain mild assumptions. We then propose a permutation recovery procedure and analyze its complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15827v1-abstract-full').style.display = 'none'; document.getElementById('2403.15827v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2305.04597</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.14848">arXiv:2402.14848</a> <span> [<a href="https://arxiv.org/pdf/2402.14848">pdf</a>, <a href="https://arxiv.org/format/2402.14848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Same Task, More Tokens: the Impact of Input Length on the Reasoning Performance of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Levy%2C+M">Mosh Levy</a>, <a href="/search/cs?searchtype=author&query=Jacoby%2C+A">Alon Jacoby</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.14848v2-abstract-short" style="display: inline;"> This paper explores the impact of extending input lengths on the capabilities of Large Language Models (LLMs). Despite LLMs advancements in recent times, their performance consistency across different input lengths is not well understood. We investigate this aspect by introducing a novel QA reasoning framework, specifically designed to assess the impact of input length. We isolate the effect of in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14848v2-abstract-full').style.display = 'inline'; document.getElementById('2402.14848v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.14848v2-abstract-full" style="display: none;"> This paper explores the impact of extending input lengths on the capabilities of Large Language Models (LLMs). Despite LLMs advancements in recent times, their performance consistency across different input lengths is not well understood. We investigate this aspect by introducing a novel QA reasoning framework, specifically designed to assess the impact of input length. We isolate the effect of input length using multiple versions of the same sample, each being extended with padding of different lengths, types and locations. Our findings show a notable degradation in LLMs' reasoning performance at much shorter input lengths than their technical maximum. We show that the degradation trend appears in every version of our dataset, although at different intensities. Additionally, our study reveals that the traditional metric of next word prediction correlates negatively with performance of LLMs' on our reasoning dataset. We analyse our results and identify failure modes that can serve as useful guides for future research, potentially informing strategies to address the limitations observed in LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14848v2-abstract-full').style.display = 'none'; document.getElementById('2402.14848v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.13906">arXiv:2402.13906</a> <span> [<a href="https://arxiv.org/pdf/2402.13906">pdf</a>, <a href="https://arxiv.org/format/2402.13906">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Collection-Wide Similarities for Unsupervised Document Structure Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lior%2C+G">Gili Lior</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Stanovsky%2C+G">Gabriel Stanovsky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.13906v2-abstract-short" style="display: inline;"> Document collections of various domains, e.g., legal, medical, or financial, often share some underlying collection-wide structure, which captures information that can aid both human users and structure-aware models. We propose to identify the typical structure of document within a collection, which requires to capture recurring topics across the collection, while abstracting over arbitrary header… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13906v2-abstract-full').style.display = 'inline'; document.getElementById('2402.13906v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.13906v2-abstract-full" style="display: none;"> Document collections of various domains, e.g., legal, medical, or financial, often share some underlying collection-wide structure, which captures information that can aid both human users and structure-aware models. We propose to identify the typical structure of document within a collection, which requires to capture recurring topics across the collection, while abstracting over arbitrary header paraphrases, and ground each topic to respective document locations. These requirements pose several challenges: headers that mark recurring topics frequently differ in phrasing, certain section headers are unique to individual documents and do not reflect the typical structure, and the order of topics can vary between documents. Subsequently, we develop an unsupervised graph-based method which leverages both inter- and intra-document similarities, to extract the underlying collection-wide structure. Our evaluations on three diverse domains in both English and Hebrew indicate that our method extracts meaningful collection-wide structure, and we hope that future work will leverage our method for multi-document applications and structure-aware models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13906v2-abstract-full').style.display = 'none'; document.getElementById('2402.13906v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2024 findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11355">arXiv:2402.11355</a> <span> [<a href="https://arxiv.org/pdf/2402.11355">pdf</a>, <a href="https://arxiv.org/format/2402.11355">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Practical Method for Generating String Counterfactuals </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Avitan%2C+M">Matan Avitan</a>, <a href="/search/cs?searchtype=author&query=Cotterell%2C+R">Ryan Cotterell</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11355v5-abstract-short" style="display: inline;"> Interventions targeting the representation space of language models (LMs) have emerged as an effective means to influence model behavior. Such methods are employed, for example, to eliminate or alter the encoding of demographic information such as gender within the model's representations and, in so doing, create a counterfactual representation. However, because the intervention operates within th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11355v5-abstract-full').style.display = 'inline'; document.getElementById('2402.11355v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11355v5-abstract-full" style="display: none;"> Interventions targeting the representation space of language models (LMs) have emerged as an effective means to influence model behavior. Such methods are employed, for example, to eliminate or alter the encoding of demographic information such as gender within the model's representations and, in so doing, create a counterfactual representation. However, because the intervention operates within the representation space, understanding precisely what aspects of the text it modifies poses a challenge. In this paper, we give a method to convert representation counterfactuals into string counterfactuals. We demonstrate that this approach enables us to analyze the linguistic alterations corresponding to a given representation space intervention and to interpret the features utilized to encode a specific concept. Moreover, the resulting counterfactuals can be used to mitigate bias in classification through data augmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11355v5-abstract-full').style.display = 'none'; document.getElementById('2402.11355v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.18360">arXiv:2310.18360</a> <span> [<a href="https://arxiv.org/pdf/2310.18360">pdf</a>, <a href="https://arxiv.org/format/2310.18360">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Guiding LLM to Fool Itself: Automatically Manipulating Machine Reading Comprehension Shortcut Triggers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Levy%2C+M">Mosh Levy</a>, <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.18360v1-abstract-short" style="display: inline;"> Recent applications of LLMs in Machine Reading Comprehension (MRC) systems have shown impressive results, but the use of shortcuts, mechanisms triggered by features spuriously correlated to the true label, has emerged as a potential threat to their reliability. We analyze the problem from two angles: LLMs as editors, guided to edit text to mislead LLMs; and LLMs as readers, who answer questions ba… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18360v1-abstract-full').style.display = 'inline'; document.getElementById('2310.18360v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.18360v1-abstract-full" style="display: none;"> Recent applications of LLMs in Machine Reading Comprehension (MRC) systems have shown impressive results, but the use of shortcuts, mechanisms triggered by features spuriously correlated to the true label, has emerged as a potential threat to their reliability. We analyze the problem from two angles: LLMs as editors, guided to edit text to mislead LLMs; and LLMs as readers, who answer questions based on the edited text. We introduce a framework that guides an editor to add potential shortcuts-triggers to samples. Using GPT4 as the editor, we find it can successfully edit trigger shortcut in samples that fool LLMs. Analysing LLMs as readers, we observe that even capable LLMs can be deceived using shortcut knowledge. Strikingly, we discover that GPT4 can be deceived by its own edits (15% drop in F1). Our findings highlight inherent vulnerabilities of LLMs to shortcut manipulations. We publish ShortcutQA, a curated dataset generated by our framework for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18360v1-abstract-full').style.display = 'none'; document.getElementById('2310.18360v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2023 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.14282">arXiv:2310.14282</a> <span> [<a href="https://arxiv.org/pdf/2310.14282">pdf</a>, <a href="https://arxiv.org/format/2310.14282">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> NERetrieve: Dataset for Next Generation Named Entity Recognition and Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Katz%2C+U">Uri Katz</a>, <a href="/search/cs?searchtype=author&query=Vetzler%2C+M">Matan Vetzler</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+A+D">Amir DN Cohen</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.14282v1-abstract-short" style="display: inline;"> Recognizing entities in texts is a central need in many information-seeking scenarios, and indeed, Named Entity Recognition (NER) is arguably one of the most successful examples of a widely adopted NLP task and corresponding NLP technology. Recent advances in large language models (LLMs) appear to provide effective solutions (also) for NER tasks that were traditionally handled with dedicated model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14282v1-abstract-full').style.display = 'inline'; document.getElementById('2310.14282v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.14282v1-abstract-full" style="display: none;"> Recognizing entities in texts is a central need in many information-seeking scenarios, and indeed, Named Entity Recognition (NER) is arguably one of the most successful examples of a widely adopted NLP task and corresponding NLP technology. Recent advances in large language models (LLMs) appear to provide effective solutions (also) for NER tasks that were traditionally handled with dedicated models, often matching or surpassing the abilities of the dedicated models. Should NER be considered a solved problem? We argue to the contrary: the capabilities provided by LLMs are not the end of NER research, but rather an exciting beginning. They allow taking NER to the next level, tackling increasingly more useful, and increasingly more challenging, variants. We present three variants of the NER task, together with a dataset to support them. The first is a move towards more fine-grained -- and intersectional -- entity types. The second is a move towards zero-shot recognition and extraction of these fine-grained types based on entity-type labels. The third, and most challenging, is the move from the recognition setup to a novel retrieval setup, where the query is a zero-shot entity type, and the expected result is all the sentences from a large, pre-indexed corpus that contain entities of these types, and their corresponding spans. We show that all of these are far from being solved. We provide a large, silver-annotated corpus of 4 million paragraphs covering 500 entity types, to facilitate research towards all of these three goals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14282v1-abstract-full').style.display = 'none'; document.getElementById('2310.14282v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings of EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.13960">arXiv:2310.13960</a> <span> [<a href="https://arxiv.org/pdf/2310.13960">pdf</a>, <a href="https://arxiv.org/format/2310.13960">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Linguistically Motivated Sign Language Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Moryossef%2C+A">Amit Moryossef</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zifan Jiang</a>, <a href="/search/cs?searchtype=author&query=M%C3%BCller%2C+M">Mathias M眉ller</a>, <a href="/search/cs?searchtype=author&query=Ebling%2C+S">Sarah Ebling</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.13960v2-abstract-short" style="display: inline;"> Sign language segmentation is a crucial task in sign language processing systems. It enables downstream tasks such as sign recognition, transcription, and machine translation. In this work, we consider two kinds of segmentation: segmentation into individual signs and segmentation into phrases, larger units comprising several signs. We propose a novel approach to jointly model these two tasks. Ou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13960v2-abstract-full').style.display = 'inline'; document.getElementById('2310.13960v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.13960v2-abstract-full" style="display: none;"> Sign language segmentation is a crucial task in sign language processing systems. It enables downstream tasks such as sign recognition, transcription, and machine translation. In this work, we consider two kinds of segmentation: segmentation into individual signs and segmentation into phrases, larger units comprising several signs. We propose a novel approach to jointly model these two tasks. Our method is motivated by linguistic cues observed in sign language corpora. We replace the predominant IO tagging scheme with BIO tagging to account for continuous signing. Given that prosody plays a significant role in phrase boundaries, we explore the use of optical flow features. We also provide an extensive analysis of hand shapes and 3D hand normalization. We find that introducing BIO tagging is necessary to model sign boundaries. Explicitly encoding prosody by optical flow improves segmentation in shallow models, but its contribution is negligible in deeper models. Careful tuning of the decoding algorithm atop the models further improves the segmentation quality. We demonstrate that our final models generalize to out-of-domain video content in a different signed language, even under a zero-shot setting. We observe that including optical flow and 3D hand normalization enhances the robustness of the model in this context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13960v2-abstract-full').style.display = 'none'; document.getElementById('2310.13960v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2023 (Findings)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.10057">arXiv:2309.10057</a> <span> [<a href="https://arxiv.org/pdf/2309.10057">pdf</a>, <a href="https://arxiv.org/format/2309.10057">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2023.acl-demo.27">10.18653/v1/2023.acl-demo.27 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Hierarchy Builder: Organizing Textual Spans into a Hierarchy to Facilitate Navigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yair%2C+I">Itay Yair</a>, <a href="/search/cs?searchtype=author&query=Taub-Tabib%2C+H">Hillel Taub-Tabib</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.10057v1-abstract-short" style="display: inline;"> Information extraction systems often produce hundreds to thousands of strings on a specific topic. We present a method that facilitates better consumption of these strings, in an exploratory setting in which a user wants to both get a broad overview of what's available, and a chance to dive deeper on some aspects. The system works by grouping similar items together and arranging the remaining item… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.10057v1-abstract-full').style.display = 'inline'; document.getElementById('2309.10057v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.10057v1-abstract-full" style="display: none;"> Information extraction systems often produce hundreds to thousands of strings on a specific topic. We present a method that facilitates better consumption of these strings, in an exploratory setting in which a user wants to both get a broad overview of what's available, and a chance to dive deeper on some aspects. The system works by grouping similar items together and arranging the remaining items into a hierarchical navigable DAG structure. We apply the method to medical information extraction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.10057v1-abstract-full').style.display = 'none'; document.getElementById('2309.10057v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages including citations; Presented at the ACL 2023 DEMO track, pages 282-290</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.3.1; H.3.3; H.5.3; I.2.7; E.1; I.2.4 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), Pages 282-290, Toronto, Canada, July 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.13922">arXiv:2306.13922</a> <span> [<a href="https://arxiv.org/pdf/2306.13922">pdf</a>, <a href="https://arxiv.org/format/2306.13922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised Mapping of Arguments of Deverbal Nouns to Their Corresponding Verbal Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Weinstein%2C+A">Aviv Weinstein</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.13922v1-abstract-short" style="display: inline;"> Deverbal nouns are nominal forms of verbs commonly used in written English texts to describe events or actions, as well as their arguments. However, many NLP systems, and in particular pattern-based ones, neglect to handle such nominalized constructions. The solutions that do exist for handling arguments of nominalized constructions are based on semantic annotation and require semantic ontologies,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13922v1-abstract-full').style.display = 'inline'; document.getElementById('2306.13922v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.13922v1-abstract-full" style="display: none;"> Deverbal nouns are nominal forms of verbs commonly used in written English texts to describe events or actions, as well as their arguments. However, many NLP systems, and in particular pattern-based ones, neglect to handle such nominalized constructions. The solutions that do exist for handling arguments of nominalized constructions are based on semantic annotation and require semantic ontologies, making their applications restricted to a small set of nouns. We propose to adopt instead a more syntactic approach, which maps the arguments of deverbal nouns to the universal-dependency relations of the corresponding verbal construction. We present an unsupervised mechanism -- based on contextualized word representations -- which allows to enrich universal-dependency trees with dependency arcs denoting arguments of deverbal nouns, using the same labels as the corresponding verbal cases. By sharing the same label set as in the verbal case, patterns that were developed for verbs can be applied without modification but with high accuracy also to the nominal constructions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13922v1-abstract-full').style.display = 'none'; document.getElementById('2306.13922v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.08877">arXiv:2306.08877</a> <span> [<a href="https://arxiv.org/pdf/2306.08877">pdf</a>, <a href="https://arxiv.org/format/2306.08877">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Linguistic Binding in Diffusion Models: Enhancing Attribute Correspondence through Attention Map Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rassin%2C+R">Royi Rassin</a>, <a href="/search/cs?searchtype=author&query=Hirsch%2C+E">Eran Hirsch</a>, <a href="/search/cs?searchtype=author&query=Glickman%2C+D">Daniel Glickman</a>, <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Chechik%2C+G">Gal Chechik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.08877v3-abstract-short" style="display: inline;"> Text-conditioned image generation models often generate incorrect associations between entities and their visual attributes. This reflects an impaired mapping between linguistic binding of entities and modifiers in the prompt and visual binding of the corresponding elements in the generated image. As one notable example, a query like "a pink sunflower and a yellow flamingo" may incorrectly produce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.08877v3-abstract-full').style.display = 'inline'; document.getElementById('2306.08877v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.08877v3-abstract-full" style="display: none;"> Text-conditioned image generation models often generate incorrect associations between entities and their visual attributes. This reflects an impaired mapping between linguistic binding of entities and modifiers in the prompt and visual binding of the corresponding elements in the generated image. As one notable example, a query like "a pink sunflower and a yellow flamingo" may incorrectly produce an image of a yellow sunflower and a pink flamingo. To remedy this issue, we propose SynGen, an approach which first syntactically analyses the prompt to identify entities and their modifiers, and then uses a novel loss function that encourages the cross-attention maps to agree with the linguistic binding reflected by the syntax. Specifically, we encourage large overlap between attention maps of entities and their modifiers, and small overlap with other entities and modifier words. The loss is optimized during inference, without retraining or fine-tuning the model. Human evaluation on three datasets, including one new and challenging set, demonstrate significant improvements of SynGen compared with current state of the art methods. This work highlights how making use of sentence structure during inference can efficiently and substantially improve the faithfulness of text-to-image generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.08877v3-abstract-full').style.display = 'none'; document.getElementById('2306.08877v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2023 (oral). Our code is publicly available at https://github.com/RoyiRa/Syntax-Guided-Generation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.17714">arXiv:2305.17714</a> <span> [<a href="https://arxiv.org/pdf/2305.17714">pdf</a>, <a href="https://arxiv.org/format/2305.17714">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> An Open-Source Gloss-Based Baseline for Spoken to Signed Language Translation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Moryossef%2C+A">Amit Moryossef</a>, <a href="/search/cs?searchtype=author&query=M%C3%BCller%2C+M">Mathias M眉ller</a>, <a href="/search/cs?searchtype=author&query=G%C3%B6hring%2C+A">Anne G枚hring</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zifan Jiang</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Ebling%2C+S">Sarah Ebling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.17714v1-abstract-short" style="display: inline;"> Sign language translation systems are complex and require many components. As a result, it is very hard to compare methods across publications. We present an open-source implementation of a text-to-gloss-to-pose-to-video pipeline approach, demonstrating conversion from German to Swiss German Sign Language, French to French Sign Language of Switzerland, and Italian to Italian Sign Language of Switz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.17714v1-abstract-full').style.display = 'inline'; document.getElementById('2305.17714v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.17714v1-abstract-full" style="display: none;"> Sign language translation systems are complex and require many components. As a result, it is very hard to compare methods across publications. We present an open-source implementation of a text-to-gloss-to-pose-to-video pipeline approach, demonstrating conversion from German to Swiss German Sign Language, French to French Sign Language of Switzerland, and Italian to Italian Sign Language of Switzerland. We propose three different components for the text-to-gloss translation: a lemmatizer, a rule-based word reordering and dropping component, and a neural machine translation system. Gloss-to-pose conversion occurs using data from a lexicon for three different signed languages, with skeletal poses extracted from videos. To generate a sentence, the text-to-gloss system is first run, and the pose representations of the resulting signs are stitched together. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.17714v1-abstract-full').style.display = 'none'; document.getElementById('2305.17714v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.16740">arXiv:2305.16740</a> <span> [<a href="https://arxiv.org/pdf/2305.16740">pdf</a>, <a href="https://arxiv.org/format/2305.16740">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Conjunct Resolution in the Face of Verbal Omissions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rassin%2C+R">Royi Rassin</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Tsarfaty%2C+R">Reut Tsarfaty</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.16740v1-abstract-short" style="display: inline;"> Verbal omissions are complex syntactic phenomena in VP coordination structures. They occur when verbs and (some of) their arguments are omitted from subsequent clauses after being explicitly stated in an initial clause. Recovering these omitted elements is necessary for accurate interpretation of the sentence, and while humans easily and intuitively fill in the missing information, state-of-the-ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16740v1-abstract-full').style.display = 'inline'; document.getElementById('2305.16740v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.16740v1-abstract-full" style="display: none;"> Verbal omissions are complex syntactic phenomena in VP coordination structures. They occur when verbs and (some of) their arguments are omitted from subsequent clauses after being explicitly stated in an initial clause. Recovering these omitted elements is necessary for accurate interpretation of the sentence, and while humans easily and intuitively fill in the missing information, state-of-the-art models continue to struggle with this task. Previous work is limited to small-scale datasets, synthetic data creation methods, and to resolution methods in the dependency-graph level. In this work we propose a conjunct resolution task that operates directly on the text and makes use of a split-and-rephrase paradigm in order to recover the missing elements in the coordination structure. To this end, we first formulate a pragmatic framework of verbal omissions which describes the different types of omissions, and develop an automatic scalable collection method. Based on this method, we curate a large dataset, containing over 10K examples of naturally-occurring verbal omissions with crowd-sourced annotations of the resolved conjuncts. We train various neural baselines for this task, and show that while our best method obtains decent performance, it leaves ample space for improvement. We propose our dataset, metrics and models as a starting point for future research on this topic. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16740v1-abstract-full').style.display = 'none'; document.getElementById('2305.16740v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14785">arXiv:2305.14785</a> <span> [<a href="https://arxiv.org/pdf/2305.14785">pdf</a>, <a href="https://arxiv.org/format/2305.14785">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Simple Linguistic Inferences of Large Language Models (LLMs): Blind Spots and Blinds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Basmov%2C+V">Victoria Basmov</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Tsarfaty%2C+R">Reut Tsarfaty</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14785v2-abstract-short" style="display: inline;"> We evaluate LLMs' language understanding capacities on simple inference tasks that most humans find trivial. Specifically, we target (i) grammatically-specified entailments, (ii) premises with evidential adverbs of uncertainty, and (iii) monotonicity entailments. We design evaluation sets for these tasks and conduct experiments in both zero-shot and chain-of-thought setups, and with multiple promp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14785v2-abstract-full').style.display = 'inline'; document.getElementById('2305.14785v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14785v2-abstract-full" style="display: none;"> We evaluate LLMs' language understanding capacities on simple inference tasks that most humans find trivial. Specifically, we target (i) grammatically-specified entailments, (ii) premises with evidential adverbs of uncertainty, and (iii) monotonicity entailments. We design evaluation sets for these tasks and conduct experiments in both zero-shot and chain-of-thought setups, and with multiple prompts and LLMs. The models exhibit moderate to low performance on these evaluation sets. Subsequent experiments show that embedding the premise in syntactic constructions that should preserve the entailment relations (presupposition triggers) or change them (non-factives), further confuses the models, causing them to either under-predict or over-predict certain entailment labels regardless of the true relation, and often disregarding the nature of the embedding context. Overall these results suggest that, despite LLMs' celebrated language understanding capacity, even the strongest models have blindspots with respect to certain types of entailments, and certain information-packaging structures act as ``blinds'' overshadowing the semantics of the embedded premise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14785v2-abstract-full').style.display = 'none'; document.getElementById('2305.14785v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14763">arXiv:2305.14763</a> <span> [<a href="https://arxiv.org/pdf/2305.14763">pdf</a>, <a href="https://arxiv.org/format/2305.14763">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Clever Hans or Neural Theory of Mind? Stress Testing Social Reasoning in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shapira%2C+N">Natalie Shapira</a>, <a href="/search/cs?searchtype=author&query=Levy%2C+M">Mosh Levy</a>, <a href="/search/cs?searchtype=author&query=Alavi%2C+S+H">Seyed Hossein Alavi</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xuhui Zhou</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Sap%2C+M">Maarten Sap</a>, <a href="/search/cs?searchtype=author&query=Shwartz%2C+V">Vered Shwartz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14763v1-abstract-short" style="display: inline;"> The escalating debate on AI's capabilities warrants developing reliable metrics to assess machine "intelligence". Recently, many anecdotal examples were used to suggest that newer large language models (LLMs) like ChatGPT and GPT-4 exhibit Neural Theory-of-Mind (N-ToM); however, prior work reached conflicting conclusions regarding those abilities. We investigate the extent of LLMs' N-ToM through a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14763v1-abstract-full').style.display = 'inline'; document.getElementById('2305.14763v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14763v1-abstract-full" style="display: none;"> The escalating debate on AI's capabilities warrants developing reliable metrics to assess machine "intelligence". Recently, many anecdotal examples were used to suggest that newer large language models (LLMs) like ChatGPT and GPT-4 exhibit Neural Theory-of-Mind (N-ToM); however, prior work reached conflicting conclusions regarding those abilities. We investigate the extent of LLMs' N-ToM through an extensive evaluation on 6 tasks and find that while LLMs exhibit certain N-ToM abilities, this behavior is far from being robust. We further examine the factors impacting performance on N-ToM tasks and discover that LLMs struggle with adversarial examples, indicating reliance on shallow heuristics rather than robust ToM abilities. We caution against drawing conclusions from anecdotal examples, limited benchmark testing, and using human-designed psychological tests to evaluate models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14763v1-abstract-full').style.display = 'none'; document.getElementById('2305.14763v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.12517">arXiv:2305.12517</a> <span> [<a href="https://arxiv.org/pdf/2305.12517">pdf</a>, <a href="https://arxiv.org/format/2305.12517">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Description-Based Text Similarity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Pyatkin%2C+V">Valentina Pyatkin</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+A+D">Amir DN Cohen</a>, <a href="/search/cs?searchtype=author&query=Manevich%2C+A">Avshalom Manevich</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.12517v5-abstract-short" style="display: inline;"> Identifying texts with a given semantics is central for many information seeking scenarios. Similarity search over vector embeddings appear to be central to this ability, yet the similarity reflected in current text embeddings is corpus-driven, and is inconsistent and sub-optimal for many use cases. What, then, is a good notion of similarity for effective retrieval of text? We identify the need… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12517v5-abstract-full').style.display = 'inline'; document.getElementById('2305.12517v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.12517v5-abstract-full" style="display: none;"> Identifying texts with a given semantics is central for many information seeking scenarios. Similarity search over vector embeddings appear to be central to this ability, yet the similarity reflected in current text embeddings is corpus-driven, and is inconsistent and sub-optimal for many use cases. What, then, is a good notion of similarity for effective retrieval of text? We identify the need to search for texts based on abstract descriptions of their content, and the corresponding notion of \emph{description based similarity}. We demonstrate the inadequacy of current text embeddings and propose an alternative model that significantly improves when used in standard nearest neighbor search. The model is trained using positive and negative pairs sourced through prompting a LLM, demonstrating how data from LLMs can be used for creating new capabilities not immediately possible using the original model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12517v5-abstract-full').style.display = 'none'; document.getElementById('2305.12517v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in COLM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10160">arXiv:2305.10160</a> <span> [<a href="https://arxiv.org/pdf/2305.10160">pdf</a>, <a href="https://arxiv.org/format/2305.10160">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Stop Uploading Test Data in Plain Text: Practical Strategies for Mitigating Data Contamination by Evaluation Benchmarks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jacovi%2C+A">Alon Jacovi</a>, <a href="/search/cs?searchtype=author&query=Caciularu%2C+A">Avi Caciularu</a>, <a href="/search/cs?searchtype=author&query=Goldman%2C+O">Omer Goldman</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10160v2-abstract-short" style="display: inline;"> Data contamination has become prevalent and challenging with the rise of models pretrained on large automatically-crawled corpora. For closed models, the training data becomes a trade secret, and even for open models, it is not trivial to detect contamination. Strategies such as leaderboards with hidden answers, or using test data which is guaranteed to be unseen, are expensive and become fragile… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10160v2-abstract-full').style.display = 'inline'; document.getElementById('2305.10160v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10160v2-abstract-full" style="display: none;"> Data contamination has become prevalent and challenging with the rise of models pretrained on large automatically-crawled corpora. For closed models, the training data becomes a trade secret, and even for open models, it is not trivial to detect contamination. Strategies such as leaderboards with hidden answers, or using test data which is guaranteed to be unseen, are expensive and become fragile with time. Assuming that all relevant actors value clean test data and will cooperate to mitigate data contamination, what can be done? We propose three strategies that can make a difference: (1) Test data made public should be encrypted with a public key and licensed to disallow derivative distribution; (2) demand training exclusion controls from closed API holders, and protect your test data by refusing to evaluate without them; (3) avoid data which appears with its solution on the internet, and release the web-page context of internet-derived data along with the data. These strategies are practical and can be effective in preventing data contamination. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10160v2-abstract-full').style.display = 'none'; document.getElementById('2305.10160v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.02679">arXiv:2305.02679</a> <span> [<a href="https://arxiv.org/pdf/2305.02679">pdf</a>, <a href="https://arxiv.org/format/2305.02679">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Neighboring Words Affect Human Interpretation of Saliency Explanations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jacovi%2C+A">Alon Jacovi</a>, <a href="/search/cs?searchtype=author&query=Schuff%2C+H">Hendrik Schuff</a>, <a href="/search/cs?searchtype=author&query=Adel%2C+H">Heike Adel</a>, <a href="/search/cs?searchtype=author&query=Vu%2C+N+T">Ngoc Thang Vu</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.02679v2-abstract-short" style="display: inline;"> Word-level saliency explanations ("heat maps over words") are often used to communicate feature-attribution in text-based models. Recent studies found that superficial factors such as word length can distort human interpretation of the communicated saliency scores. We conduct a user study to investigate how the marking of a word's neighboring words affect the explainee's perception of the word's i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.02679v2-abstract-full').style.display = 'inline'; document.getElementById('2305.02679v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.02679v2-abstract-full" style="display: none;"> Word-level saliency explanations ("heat maps over words") are often used to communicate feature-attribution in text-based models. Recent studies found that superficial factors such as word length can distort human interpretation of the communicated saliency scores. We conduct a user study to investigate how the marking of a word's neighboring words affect the explainee's perception of the word's importance in the context of a saliency explanation. We find that neighboring words have significant effects on the word's importance rating. Concretely, we identify that the influence changes based on neighboring direction (left vs. right) and a-priori linguistic and computational measures of phrases and collocations (vs. unrelated neighboring words). Our results question whether text-based saliency explanations should be continued to be communicated at word level, and inform future research on alternative saliency explanation methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.02679v2-abstract-full').style.display = 'none'; document.getElementById('2305.02679v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.02633">arXiv:2305.02633</a> <span> [<a href="https://arxiv.org/pdf/2305.02633">pdf</a>, <a href="https://arxiv.org/format/2305.02633">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Conformal Nucleus Sampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Goldberger%2C+J">Jacob Goldberger</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.02633v1-abstract-short" style="display: inline;"> Language models generate text based on successively sampling the next word. A decoding procedure based on nucleus (top-$p$) sampling chooses from the smallest possible set of words whose cumulative probability exceeds the probability $p$. In this work, we assess whether a top-$p$ set is indeed aligned with its probabilistic meaning in various linguistic contexts. We employ conformal prediction, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.02633v1-abstract-full').style.display = 'inline'; document.getElementById('2305.02633v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.02633v1-abstract-full" style="display: none;"> Language models generate text based on successively sampling the next word. A decoding procedure based on nucleus (top-$p$) sampling chooses from the smallest possible set of words whose cumulative probability exceeds the probability $p$. In this work, we assess whether a top-$p$ set is indeed aligned with its probabilistic meaning in various linguistic contexts. We employ conformal prediction, a calibration procedure that focuses on the construction of minimal prediction sets according to a desired confidence level, to calibrate the parameter $p$ as a function of the entropy of the next word distribution. We find that OPT models are overconfident, and that calibration shows a moderate inverse scaling with model size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.02633v1-abstract-full').style.display = 'none'; document.getElementById('2305.02633v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as a short paper in Findings of ACL23</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.14836">arXiv:2304.14836</a> <span> [<a href="https://arxiv.org/pdf/2304.14836">pdf</a>, <a href="https://arxiv.org/format/2304.14836">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Training Large Scale Polynomial CNNs for E2E Inference over Homomorphic Encryption </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Baruch%2C+M">Moran Baruch</a>, <a href="/search/cs?searchtype=author&query=Drucker%2C+N">Nir Drucker</a>, <a href="/search/cs?searchtype=author&query=Ezov%2C+G">Gilad Ezov</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Kushnir%2C+E">Eyal Kushnir</a>, <a href="/search/cs?searchtype=author&query=Lerner%2C+J">Jenny Lerner</a>, <a href="/search/cs?searchtype=author&query=Soceanu%2C+O">Omri Soceanu</a>, <a href="/search/cs?searchtype=author&query=Zimerman%2C+I">Itamar Zimerman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.14836v2-abstract-short" style="display: inline;"> Training large-scale CNNs that during inference can be run under Homomorphic Encryption (HE) is challenging due to the need to use only polynomial operations. This limits HE-based solutions adoption. We address this challenge and pioneer in providing a novel training method for large polynomial CNNs such as ResNet-152 and ConvNeXt models, and achieve promising accuracy on encrypted samples on larg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.14836v2-abstract-full').style.display = 'inline'; document.getElementById('2304.14836v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.14836v2-abstract-full" style="display: none;"> Training large-scale CNNs that during inference can be run under Homomorphic Encryption (HE) is challenging due to the need to use only polynomial operations. This limits HE-based solutions adoption. We address this challenge and pioneer in providing a novel training method for large polynomial CNNs such as ResNet-152 and ConvNeXt models, and achieve promising accuracy on encrypted samples on large-scale dataset such as ImageNet. Additionally, we provide optimization insights regarding activation functions and skip-connection latency impacts, enhancing HE-based evaluation efficiency. Finally, to demonstrate the robustness of our method, we provide a polynomial adaptation of the CLIP model for secure zero-shot prediction, unlocking unprecedented capabilities at the intersection of HE and transfer learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.14836v2-abstract-full').style.display = 'none'; document.getElementById('2304.14836v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.11754">arXiv:2304.11754</a> <span> [<a href="https://arxiv.org/pdf/2304.11754">pdf</a>, <a href="https://arxiv.org/format/2304.11754">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Silent Abandonment in Contact Centers: Estimating Customer Patience from Uncertain Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Castellanos%2C+A">Antonio Castellanos</a>, <a href="/search/cs?searchtype=author&query=Yom-Tov%2C+G+B">Galit B. Yom-Tov</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yair Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.11754v2-abstract-short" style="display: inline;"> In the quest to improve services, companies offer customers the opportunity to interact with agents through contact centers, where the communication is mainly text-based. This has become one of the favorite channels of communication with companies in recent years. However, contact centers face operational challenges, since the measurement of common proxies for customer experience, such as knowledg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.11754v2-abstract-full').style.display = 'inline'; document.getElementById('2304.11754v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.11754v2-abstract-full" style="display: none;"> In the quest to improve services, companies offer customers the opportunity to interact with agents through contact centers, where the communication is mainly text-based. This has become one of the favorite channels of communication with companies in recent years. However, contact centers face operational challenges, since the measurement of common proxies for customer experience, such as knowledge of whether customers have abandoned the queue and their willingness to wait for service (patience), are subject to information uncertainty. We focus this research on the impact of a main source of such uncertainty: silent abandonment by customers. These customers leave the system while waiting for a reply to their inquiry, but give no indication of doing so, such as closing the mobile app of the interaction. As a result, the system is unaware that they have left and waste agent time and capacity until this fact is realized. In this paper, we show that 30%-67% of the abandoning customers abandon the system silently, and that such customer behavior reduces system efficiency by 5%-15%. To do so, we develop methodologies to identify silent-abandonment customers in two types of contact centers: chat and messaging systems. We first use text analysis and an SVM model to estimate the actual abandonment level. We then use a parametric estimator and develop an expectation-maximization algorithm to estimate customer patience accurately, as customer patience is an important parameter for fitting queueing models to the data. We show how accounting for silent abandonment in a queueing model improves dramatically the estimation accuracy of key measures of performance. Finally, we suggest strategies to operationally cope with the phenomenon of silent abandonment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.11754v2-abstract-full').style.display = 'none'; document.getElementById('2304.11754v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">V2</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.10527">arXiv:2303.10527</a> <span> [<a href="https://arxiv.org/pdf/2303.10527">pdf</a>, <a href="https://arxiv.org/format/2303.10527">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Two Kinds of Recall </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.10527v1-abstract-short" style="display: inline;"> It is an established assumption that pattern-based models are good at precision, while learning based models are better at recall. But is that really the case? I argue that there are two kinds of recall: d-recall, reflecting diversity, and e-recall, reflecting exhaustiveness. I demonstrate through experiments that while neural methods are indeed significantly better at d-recall, it is sometimes th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.10527v1-abstract-full').style.display = 'inline'; document.getElementById('2303.10527v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.10527v1-abstract-full" style="display: none;"> It is an established assumption that pattern-based models are good at precision, while learning based models are better at recall. But is that really the case? I argue that there are two kinds of recall: d-recall, reflecting diversity, and e-recall, reflecting exhaustiveness. I demonstrate through experiments that while neural methods are indeed significantly better at d-recall, it is sometimes the case that pattern-based methods are still substantially better at e-recall. Ideal methods should aim for both kinds, and this ideal should in turn be reflected in our evaluations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.10527v1-abstract-full').style.display = 'none'; document.getElementById('2303.10527v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.03745">arXiv:2303.03745</a> <span> [<a href="https://arxiv.org/pdf/2303.03745">pdf</a>, <a href="https://arxiv.org/format/2303.03745">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> At Your Fingertips: Extracting Piano Fingering Instructions from Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Moryossef%2C+A">Amit Moryossef</a>, <a href="/search/cs?searchtype=author&query=Elazar%2C+Y">Yanai Elazar</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.03745v1-abstract-short" style="display: inline;"> Piano fingering -- knowing which finger to use to play each note in a musical piece, is a hard and important skill to master when learning to play the piano. While some sheet music is available with expert-annotated fingering information, most pieces lack this information, and people often resort to learning the fingering from demonstrations in online videos. We consider the AI task of automating… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.03745v1-abstract-full').style.display = 'inline'; document.getElementById('2303.03745v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.03745v1-abstract-full" style="display: none;"> Piano fingering -- knowing which finger to use to play each note in a musical piece, is a hard and important skill to master when learning to play the piano. While some sheet music is available with expert-annotated fingering information, most pieces lack this information, and people often resort to learning the fingering from demonstrations in online videos. We consider the AI task of automating the extraction of fingering information from videos. This is a non-trivial task as fingers are often occluded by other fingers, and it is often not clear from the video which of the keys were pressed, requiring the synchronization of hand position information and knowledge about the notes that were played. We show how to perform this task with high-accuracy using a combination of deep-learning modules, including a GAN-based approach for fine-tuning on out-of-domain data. We extract the fingering information with an f1 score of 97\%. We run the resulting system on 90 videos, resulting in high-quality piano fingering information of 150K notes, the largest available dataset of piano-fingering to date. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.03745v1-abstract-full').style.display = 'none'; document.getElementById('2303.03745v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, paper from 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.12673">arXiv:2210.12673</a> <span> [<a href="https://arxiv.org/pdf/2210.12673">pdf</a>, <a href="https://arxiv.org/format/2210.12673">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Lexical Generalization Improves with Larger Models and Longer Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bandel%2C+E">Elron Bandel</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Elazar%2C+Y">Yanai Elazar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.12673v2-abstract-short" style="display: inline;"> While fine-tuned language models perform well on many tasks, they were also shown to rely on superficial surface features such as lexical overlap. Excessive utilization of such heuristics can lead to failure on challenging inputs. We analyze the use of lexical overlap heuristics in natural language inference, paraphrase detection, and reading comprehension (using a novel contrastive dataset), and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12673v2-abstract-full').style.display = 'inline'; document.getElementById('2210.12673v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.12673v2-abstract-full" style="display: none;"> While fine-tuned language models perform well on many tasks, they were also shown to rely on superficial surface features such as lexical overlap. Excessive utilization of such heuristics can lead to failure on challenging inputs. We analyze the use of lexical overlap heuristics in natural language inference, paraphrase detection, and reading comprehension (using a novel contrastive dataset), and find that larger models are much less susceptible to adopting lexical overlap heuristics. We also find that longer training leads models to abandon lexical overlap heuristics. Finally, we provide evidence that the disparity between models size has its source in the pre-trained model <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12673v2-abstract-full').style.display = 'none'; document.getElementById('2210.12673v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2022 as Findings Paper, Presented at BlackboxNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.10606">arXiv:2210.10606</a> <span> [<a href="https://arxiv.org/pdf/2210.10606">pdf</a>, <a href="https://arxiv.org/format/2210.10606">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DALLE-2 is Seeing Double: Flaws in Word-to-Concept Mapping in Text2Image Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rassin%2C+R">Royi Rassin</a>, <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.10606v1-abstract-short" style="display: inline;"> We study the way DALLE-2 maps symbols (words) in the prompt to their references (entities or properties of entities in the generated image). We show that in stark contrast to the way human process language, DALLE-2 does not follow the constraint that each word has a single role in the interpretation, and sometimes re-use the same symbol for different purposes. We collect a set of stimuli that refl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10606v1-abstract-full').style.display = 'inline'; document.getElementById('2210.10606v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.10606v1-abstract-full" style="display: none;"> We study the way DALLE-2 maps symbols (words) in the prompt to their references (entities or properties of entities in the generated image). We show that in stark contrast to the way human process language, DALLE-2 does not follow the constraint that each word has a single role in the interpretation, and sometimes re-use the same symbol for different purposes. We collect a set of stimuli that reflect the phenomenon: we show that DALLE-2 depicts both senses of nouns with multiple senses at once; and that a given word can modify the properties of two distinct entities in the image, or can be depicted as one object and also modify the properties of another object, creating a semantic leakage of properties between entities. Taken together, our study highlights the differences between DALLE-2 and human language processing and opens an avenue for future study on the inductive biases of text-to-image models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10606v1-abstract-full').style.display = 'none'; document.getElementById('2210.10606v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, BlackboxNLP @ EMNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.10012">arXiv:2210.10012</a> <span> [<a href="https://arxiv.org/pdf/2210.10012">pdf</a>, <a href="https://arxiv.org/format/2210.10012">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Log-linear Guardedness and its Implications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Cotterell%2C+R">Ryan Cotterell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.10012v5-abstract-short" style="display: inline;"> Methods for erasing human-interpretable concepts from neural representations that assume linearity have been found to be tractable and useful. However, the impact of this removal on the behavior of downstream classifiers trained on the modified representations is not fully understood. In this work, we formally define the notion of log-linear guardedness as the inability of an adversary to predict… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10012v5-abstract-full').style.display = 'inline'; document.getElementById('2210.10012v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.10012v5-abstract-full" style="display: none;"> Methods for erasing human-interpretable concepts from neural representations that assume linearity have been found to be tractable and useful. However, the impact of this removal on the behavior of downstream classifiers trained on the modified representations is not fully understood. In this work, we formally define the notion of log-linear guardedness as the inability of an adversary to predict the concept directly from the representation, and study its implications. We show that, in the binary case, under certain assumptions, a downstream log-linear model cannot recover the erased concept. However, we demonstrate that a multiclass log-linear model \emph{can} be constructed that indirectly recovers the concept in some cases, pointing to the inherent limitations of log-linear guardedness as a downstream bias mitigation technique. These findings shed light on the theoretical limitations of linear erasure methods and highlight the need for further research on the connections between intrinsic and extrinsic bias in neural models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10012v5-abstract-full').style.display = 'none'; document.getElementById('2210.10012v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as a long paper in ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.06246">arXiv:2210.06246</a> <span> [<a href="https://arxiv.org/pdf/2210.06246">pdf</a>, <a href="https://arxiv.org/format/2210.06246">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CIKQA: Learning Commonsense Inference with a Unified Knowledge-in-the-loop QA Paradigm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongming Zhang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yintong Huo</a>, <a href="/search/cs?searchtype=author&query=Elazar%2C+Y">Yanai Elazar</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yangqiu Song</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Roth%2C+D">Dan Roth</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.06246v1-abstract-short" style="display: inline;"> Recently, the community has achieved substantial progress on many commonsense reasoning benchmarks. However, it is still unclear what is learned from the training process: the knowledge, inference capability, or both? We argue that due to the large scale of commonsense knowledge, it is infeasible to annotate a large enough training set for each task to cover all commonsense for learning. Thus we s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.06246v1-abstract-full').style.display = 'inline'; document.getElementById('2210.06246v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.06246v1-abstract-full" style="display: none;"> Recently, the community has achieved substantial progress on many commonsense reasoning benchmarks. However, it is still unclear what is learned from the training process: the knowledge, inference capability, or both? We argue that due to the large scale of commonsense knowledge, it is infeasible to annotate a large enough training set for each task to cover all commonsense for learning. Thus we should separate the commonsense knowledge acquisition and inference over commonsense knowledge as two separate tasks. In this work, we focus on investigating models' commonsense inference capabilities from two perspectives: (1) Whether models can know if the knowledge they have is enough to solve the task; (2) Whether models can develop commonsense inference capabilities that generalize across commonsense tasks. We first align commonsense tasks with relevant knowledge from commonsense knowledge bases and ask humans to annotate whether the knowledge is enough or not. Then, we convert different commonsense tasks into a unified question answering format to evaluate models' generalization capabilities. We name the benchmark as Commonsense Inference with Knowledge-in-the-loop Question Answering (CIKQA). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.06246v1-abstract-full').style.display = 'none'; document.getElementById('2210.06246v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.03588">arXiv:2210.03588</a> <span> [<a href="https://arxiv.org/pdf/2210.03588">pdf</a>, <a href="https://arxiv.org/format/2210.03588">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Understanding Transformer Memorization Recall Through Idioms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Haviv%2C+A">Adi Haviv</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+I">Ido Cohen</a>, <a href="/search/cs?searchtype=author&query=Gidron%2C+J">Jacob Gidron</a>, <a href="/search/cs?searchtype=author&query=Schuster%2C+R">Roei Schuster</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Geva%2C+M">Mor Geva</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.03588v3-abstract-short" style="display: inline;"> To produce accurate predictions, language models (LMs) must balance between generalization and memorization. Yet, little is known about the mechanism by which transformer LMs employ their memorization capacity. When does a model decide to output a memorized phrase, and how is this phrase then retrieved from memory? In this work, we offer the first methodological framework for probing and character… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03588v3-abstract-full').style.display = 'inline'; document.getElementById('2210.03588v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.03588v3-abstract-full" style="display: none;"> To produce accurate predictions, language models (LMs) must balance between generalization and memorization. Yet, little is known about the mechanism by which transformer LMs employ their memorization capacity. When does a model decide to output a memorized phrase, and how is this phrase then retrieved from memory? In this work, we offer the first methodological framework for probing and characterizing recall of memorized sequences in transformer LMs. First, we lay out criteria for detecting model inputs that trigger memory recall, and propose idioms as inputs that typically fulfill these criteria. Next, we construct a dataset of English idioms and use it to compare model behavior on memorized vs. non-memorized inputs. Specifically, we analyze the internal prediction construction process by interpreting the model's hidden representations as a gradual refinement of the output probability distribution. We find that across different model sizes and architectures, memorized predictions are a two-step process: early layers promote the predicted token to the top of the output distribution, and upper layers increase model confidence. This suggests that memorized information is stored and retrieved in the early layers of the network. Last, we demonstrate the utility of our methodology beyond idioms in memorized factual statements. Overall, our work makes a first step towards understanding memory recall, and provides a methodological basis for future studies of transformer memorization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03588v3-abstract-full').style.display = 'none'; document.getElementById('2210.03588v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.04280">arXiv:2209.04280</a> <span> [<a href="https://arxiv.org/pdf/2209.04280">pdf</a>, <a href="https://arxiv.org/format/2209.04280">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> F-coref: Fast, Accurate and Easy to Use Coreference Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Otmazgin%2C+S">Shon Otmazgin</a>, <a href="/search/cs?searchtype=author&query=Cattan%2C+A">Arie Cattan</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.04280v4-abstract-short" style="display: inline;"> We introduce fastcoref, a python package for fast, accurate, and easy-to-use English coreference resolution. The package is pip-installable, and allows two modes: an accurate mode based on the LingMess architecture, providing state-of-the-art coreference accuracy, and a substantially faster model, F-coref, which is the focus of this work. F-coref allows to process 2.8K OntoNotes documents in 25 se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.04280v4-abstract-full').style.display = 'inline'; document.getElementById('2209.04280v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.04280v4-abstract-full" style="display: none;"> We introduce fastcoref, a python package for fast, accurate, and easy-to-use English coreference resolution. The package is pip-installable, and allows two modes: an accurate mode based on the LingMess architecture, providing state-of-the-art coreference accuracy, and a substantially faster model, F-coref, which is the focus of this work. F-coref allows to process 2.8K OntoNotes documents in 25 seconds on a V100 GPU (compared to 6 minutes for the LingMess model, and to 12 minutes of the popular AllenNLP coreference model) with only a modest drop in accuracy. The fast speed is achieved through a combination of distillation of a compact model from the LingMess model, and an efficient batching implementation using a technique we call leftover batching. Our code is available at https://github.com/shon-otmazgin/fastcoref <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.04280v4-abstract-full').style.display = 'none'; document.getElementById('2209.04280v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AACL 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.14251">arXiv:2207.14251</a> <span> [<a href="https://arxiv.org/pdf/2207.14251">pdf</a>, <a href="https://arxiv.org/format/2207.14251">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Measuring Causal Effects of Data Statistics on Language Model's `Factual' Predictions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Elazar%2C+Y">Yanai Elazar</a>, <a href="/search/cs?searchtype=author&query=Kassner%2C+N">Nora Kassner</a>, <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Feder%2C+A">Amir Feder</a>, <a href="/search/cs?searchtype=author&query=Ravichander%2C+A">Abhilasha Ravichander</a>, <a href="/search/cs?searchtype=author&query=Mosbach%2C+M">Marius Mosbach</a>, <a href="/search/cs?searchtype=author&query=Belinkov%2C+Y">Yonatan Belinkov</a>, <a href="/search/cs?searchtype=author&query=Sch%C3%BCtze%2C+H">Hinrich Sch眉tze</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.14251v2-abstract-short" style="display: inline;"> Large amounts of training data are one of the major reasons for the high performance of state-of-the-art NLP models. But what exactly in the training data causes a model to make a certain prediction? We seek to answer this question by providing a language for describing how training data influences predictions, through a causal framework. Importantly, our framework bypasses the need to retrain exp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.14251v2-abstract-full').style.display = 'inline'; document.getElementById('2207.14251v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.14251v2-abstract-full" style="display: none;"> Large amounts of training data are one of the major reasons for the high performance of state-of-the-art NLP models. But what exactly in the training data causes a model to make a certain prediction? We seek to answer this question by providing a language for describing how training data influences predictions, through a causal framework. Importantly, our framework bypasses the need to retrain expensive models and allows us to estimate causal effects based on observational data alone. Addressing the problem of extracting factual knowledge from pretrained language models (PLMs), we focus on simple data statistics such as co-occurrence counts and show that these statistics do influence the predictions of PLMs, suggesting that such models rely on shallow heuristics. Our causal framework and our results demonstrate the importance of studying datasets and the benefits of causality for understanding NLP models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.14251v2-abstract-full').style.display = 'none'; document.getElementById('2207.14251v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">We received a criticism regarding the validity of the causal formulation in this paper. We will address them in an upcoming version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.12926">arXiv:2206.12926</a> <span> [<a href="https://arxiv.org/pdf/2206.12926">pdf</a>, <a href="https://arxiv.org/format/2206.12926">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Rivendell: Project-Based Academic Search Engine </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lazebnik%2C+T">Teddy Lazebnik</a>, <a href="/search/cs?searchtype=author&query=Weitman%2C+H">Hanna Weitman</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Kaminka%2C+G+A">Gal A. Kaminka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.12926v1-abstract-short" style="display: inline;"> Finding relevant research literature in online databases is a familiar challenge to all researchers. General search approaches trying to tackle this challenge fall into two groups: one-time search and life-time search. We observe that both approaches ignore unique attributes of the research domain and are affected by concept drift. We posit that in searching for research papers, a combination of a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.12926v1-abstract-full').style.display = 'inline'; document.getElementById('2206.12926v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.12926v1-abstract-full" style="display: none;"> Finding relevant research literature in online databases is a familiar challenge to all researchers. General search approaches trying to tackle this challenge fall into two groups: one-time search and life-time search. We observe that both approaches ignore unique attributes of the research domain and are affected by concept drift. We posit that in searching for research papers, a combination of a life-time search engine with an explicitly-provided context (project) provides a solution to the concept drift problem. We developed and deployed a project-based meta-search engine for research papers called Rivendell. Using Rivendell, we conducted experiments with 199 subjects, comparing project-based search performance to one-time and life-time search engines, revealing an improvement of up to 12.8 percent in project-based search compared to life-time search. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.12926v1-abstract-full').style.display = 'none'; document.getElementById('2206.12926v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.12644">arXiv:2205.12644</a> <span> [<a href="https://arxiv.org/pdf/2205.12644">pdf</a>, <a href="https://arxiv.org/format/2205.12644">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LingMess: Linguistically Informed Multi Expert Scorers for Coreference Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Otmazgin%2C+S">Shon Otmazgin</a>, <a href="/search/cs?searchtype=author&query=Cattan%2C+A">Arie Cattan</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.12644v3-abstract-short" style="display: inline;"> While coreference resolution typically involves various linguistic challenges, recent models are based on a single pairwise scorer for all types of pairs. We present LingMess, a new coreference model that defines different categories of coreference cases and optimize multiple pairwise scorers, where each scorer learns a specific set of linguistic challenges. Our model substantially improves pairwi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12644v3-abstract-full').style.display = 'inline'; document.getElementById('2205.12644v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.12644v3-abstract-full" style="display: none;"> While coreference resolution typically involves various linguistic challenges, recent models are based on a single pairwise scorer for all types of pairs. We present LingMess, a new coreference model that defines different categories of coreference cases and optimize multiple pairwise scorers, where each scorer learns a specific set of linguistic challenges. Our model substantially improves pairwise scores for most categories and outperforms cluster-level performance on Ontonotes and 5 additional datasets. Our model is available in https://github.com/shon-otmazgin/lingmess-coref <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12644v3-abstract-full').style.display = 'none'; document.getElementById('2205.12644v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.02289">arXiv:2205.02289</a> <span> [<a href="https://arxiv.org/pdf/2205.02289">pdf</a>, <a href="https://arxiv.org/format/2205.02289">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> A Dataset for N-ary Relation Extraction of Drug Combinations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tiktinsky%2C+A">Aryeh Tiktinsky</a>, <a href="/search/cs?searchtype=author&query=Viswanathan%2C+V">Vijay Viswanathan</a>, <a href="/search/cs?searchtype=author&query=Niezni%2C+D">Danna Niezni</a>, <a href="/search/cs?searchtype=author&query=Azagury%2C+D+M">Dana Meron Azagury</a>, <a href="/search/cs?searchtype=author&query=Shamay%2C+Y">Yosi Shamay</a>, <a href="/search/cs?searchtype=author&query=Taub-Tabib%2C+H">Hillel Taub-Tabib</a>, <a href="/search/cs?searchtype=author&query=Hope%2C+T">Tom Hope</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.02289v1-abstract-short" style="display: inline;"> Combination therapies have become the standard of care for diseases such as cancer, tuberculosis, malaria and HIV. However, the combinatorial set of available multi-drug treatments creates a challenge in identifying effective combination therapies available in a situation. To assist medical professionals in identifying beneficial drug-combinations, we construct an expert-annotated dataset for extr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.02289v1-abstract-full').style.display = 'inline'; document.getElementById('2205.02289v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.02289v1-abstract-full" style="display: none;"> Combination therapies have become the standard of care for diseases such as cancer, tuberculosis, malaria and HIV. However, the combinatorial set of available multi-drug treatments creates a challenge in identifying effective combination therapies available in a situation. To assist medical professionals in identifying beneficial drug-combinations, we construct an expert-annotated dataset for extracting information about the efficacy of drug combinations from the scientific literature. Beyond its practical utility, the dataset also presents a unique NLP challenge, as the first relation extraction dataset consisting of variable-length relations. Furthermore, the relations in this dataset predominantly require language understanding beyond the sentence level, adding to the challenge of this task. We provide a promising baseline model and identify clear areas for further improvement. We release our dataset, code, and baseline models publicly to encourage the NLP community to participate in this task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.02289v1-abstract-full').style.display = 'none'; document.getElementById('2205.02289v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in NAACL 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.12130">arXiv:2204.12130</a> <span> [<a href="https://arxiv.org/pdf/2204.12130">pdf</a>, <a href="https://arxiv.org/format/2204.12130">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LM-Debugger: An Interactive Tool for Inspection and Intervention in Transformer-Based Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Geva%2C+M">Mor Geva</a>, <a href="/search/cs?searchtype=author&query=Caciularu%2C+A">Avi Caciularu</a>, <a href="/search/cs?searchtype=author&query=Dar%2C+G">Guy Dar</a>, <a href="/search/cs?searchtype=author&query=Roit%2C+P">Paul Roit</a>, <a href="/search/cs?searchtype=author&query=Sadde%2C+S">Shoval Sadde</a>, <a href="/search/cs?searchtype=author&query=Shlain%2C+M">Micah Shlain</a>, <a href="/search/cs?searchtype=author&query=Tamir%2C+B">Bar Tamir</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.12130v2-abstract-short" style="display: inline;"> The opaque nature and unexplained behavior of transformer-based language models (LMs) have spurred a wide interest in interpreting their predictions. However, current interpretation methods mostly focus on probing models from outside, executing behavioral tests, and analyzing salience input features, while the internal prediction construction process is largely not understood. In this work, we int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.12130v2-abstract-full').style.display = 'inline'; document.getElementById('2204.12130v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.12130v2-abstract-full" style="display: none;"> The opaque nature and unexplained behavior of transformer-based language models (LMs) have spurred a wide interest in interpreting their predictions. However, current interpretation methods mostly focus on probing models from outside, executing behavioral tests, and analyzing salience input features, while the internal prediction construction process is largely not understood. In this work, we introduce LM-Debugger, an interactive debugger tool for transformer-based LMs, which provides a fine-grained interpretation of the model's internal prediction process, as well as a powerful framework for intervening in LM behavior. For its backbone, LM-Debugger relies on a recent method that interprets the inner token representations and their updates by the feed-forward layers in the vocabulary space. We demonstrate the utility of LM-Debugger for single-prediction debugging, by inspecting the internal disambiguation process done by GPT2. Moreover, we show how easily LM-Debugger allows to shift model behavior in a direction of the user's choice, by identifying a few vectors in the network and inducing effective interventions to the prediction process. We release LM-Debugger as an open-source tool and a demo over GPT2 models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.12130v2-abstract-full').style.display = 'none'; document.getElementById('2204.12130v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2022 System Demonstrations</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.09168">arXiv:2204.09168</a> <span> [<a href="https://arxiv.org/pdf/2204.09168">pdf</a>, <a href="https://arxiv.org/format/2204.09168">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Analyzing Gender Representation in Multilingual Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gonen%2C+H">Hila Gonen</a>, <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.09168v2-abstract-short" style="display: inline;"> Multilingual language models were shown to allow for nontrivial transfer across scripts and languages. In this work, we study the structure of the internal representations that enable this transfer. We focus on the representation of gender distinctions as a practical case study, and examine the extent to which the gender concept is encoded in shared subspaces across different languages. Our analys… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09168v2-abstract-full').style.display = 'inline'; document.getElementById('2204.09168v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.09168v2-abstract-full" style="display: none;"> Multilingual language models were shown to allow for nontrivial transfer across scripts and languages. In this work, we study the structure of the internal representations that enable this transfer. We focus on the representation of gender distinctions as a practical case study, and examine the extent to which the gender concept is encoded in shared subspaces across different languages. Our analysis shows that gender representations consist of several prominent components that are shared across languages, alongside language-specific components. The existence of language-independent and language-specific components provides an explanation for an intriguing empirical observation we make: while gender classification transfers well across languages, interventions for gender removal, trained on a single language, do not transfer easily to others. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.09168v2-abstract-full').style.display = 'none'; document.getElementById('2204.09168v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at RepL4NLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.14680">arXiv:2203.14680</a> <span> [<a href="https://arxiv.org/pdf/2203.14680">pdf</a>, <a href="https://arxiv.org/format/2203.14680">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Transformer Feed-Forward Layers Build Predictions by Promoting Concepts in the Vocabulary Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Geva%2C+M">Mor Geva</a>, <a href="/search/cs?searchtype=author&query=Caciularu%2C+A">Avi Caciularu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K+R">Kevin Ro Wang</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.14680v3-abstract-short" style="display: inline;"> Transformer-based language models (LMs) are at the core of modern NLP, but their internal prediction construction process is opaque and largely not understood. In this work, we make a substantial step towards unveiling this underlying prediction process, by reverse-engineering the operation of the feed-forward network (FFN) layers, one of the building blocks of transformer models. We view the toke… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.14680v3-abstract-full').style.display = 'inline'; document.getElementById('2203.14680v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.14680v3-abstract-full" style="display: none;"> Transformer-based language models (LMs) are at the core of modern NLP, but their internal prediction construction process is opaque and largely not understood. In this work, we make a substantial step towards unveiling this underlying prediction process, by reverse-engineering the operation of the feed-forward network (FFN) layers, one of the building blocks of transformer models. We view the token representation as a changing distribution over the vocabulary, and the output from each FFN layer as an additive update to that distribution. Then, we analyze the FFN updates in the vocabulary space, showing that each update can be decomposed to sub-updates corresponding to single FFN parameter vectors, each promoting concepts that are often human-interpretable. We then leverage these findings for controlling LM predictions, where we reduce the toxicity of GPT2 by almost 50%, and for improving computation efficiency with a simple early exit rule, saving 20% of computation on average. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.14680v3-abstract-full').style.display = 'none'; document.getElementById('2203.14680v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.12191">arXiv:2201.12191</a> <span> [<a href="https://arxiv.org/pdf/2201.12191">pdf</a>, <a href="https://arxiv.org/format/2201.12191">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Kernelized Concept Erasure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Vargas%2C+F">Francisco Vargas</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Cotterell%2C+R">Ryan Cotterell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.12191v6-abstract-short" style="display: inline;"> The representation space of neural models for textual data emerges in an unsupervised manner during training. Understanding how those representations encode human-interpretable concepts is a fundamental problem. One prominent approach for the identification of concepts in neural representations is searching for a linear subspace whose erasure prevents the prediction of the concept from the represe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.12191v6-abstract-full').style.display = 'inline'; document.getElementById('2201.12191v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.12191v6-abstract-full" style="display: none;"> The representation space of neural models for textual data emerges in an unsupervised manner during training. Understanding how those representations encode human-interpretable concepts is a fundamental problem. One prominent approach for the identification of concepts in neural representations is searching for a linear subspace whose erasure prevents the prediction of the concept from the representations. However, while many linear erasure algorithms are tractable and interpretable, neural networks do not necessarily represent concepts in a linear manner. To identify non-linearly encoded concepts, we propose a kernelization of a linear minimax game for concept erasure. We demonstrate that it is possible to prevent specific non-linear adversaries from predicting the concept. However, the protection does not transfer to different nonlinear adversaries. Therefore, exhaustively erasing a non-linearly encoded concept remains an open problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.12191v6-abstract-full').style.display = 'none'; document.getElementById('2201.12191v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as a long paper in EMNLP22</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.12091">arXiv:2201.12091</a> <span> [<a href="https://arxiv.org/pdf/2201.12091">pdf</a>, <a href="https://arxiv.org/format/2201.12091">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Linear Adversarial Concept Erasure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ravfogel%2C+S">Shauli Ravfogel</a>, <a href="/search/cs?searchtype=author&query=Twiton%2C+M">Michael Twiton</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Cotterell%2C+R">Ryan Cotterell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.12091v8-abstract-short" style="display: inline;"> Modern neural models trained on textual data rely on pre-trained representations that emerge without direct supervision. As these representations are increasingly being used in real-world applications, the inability to \emph{control} their content becomes an increasingly important problem. We formulate the problem of identifying and erasing a linear subspace that corresponds to a given concept, in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.12091v8-abstract-full').style.display = 'inline'; document.getElementById('2201.12091v8-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.12091v8-abstract-full" style="display: none;"> Modern neural models trained on textual data rely on pre-trained representations that emerge without direct supervision. As these representations are increasingly being used in real-world applications, the inability to \emph{control} their content becomes an increasingly important problem. We formulate the problem of identifying and erasing a linear subspace that corresponds to a given concept, in order to prevent linear predictors from recovering the concept. We model this problem as a constrained, linear maximin game, and show that existing solutions are generally not optimal for this task. We derive a closed-form solution for certain objectives, and propose a convex relaxation, \method, that works well for others. When evaluated in the context of binary gender removal, the method recovers a low-dimensional subspace whose removal mitigates bias by intrinsic and extrinsic evaluation. We show that the method is highly expressive, effectively mitigating bias in deep nonlinear classifiers while maintaining tractability and interpretability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.12091v8-abstract-full').style.display = 'none'; document.getElementById('2201.12091v8-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in ICML 2022; a revised version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.11569">arXiv:2201.11569</a> <span> [<a href="https://arxiv.org/pdf/2201.11569">pdf</a>, <a href="https://arxiv.org/format/2201.11569">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3531146.3533127">10.1145/3531146.3533127 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Human Interpretation of Saliency-based Explanation Over Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Schuff%2C+H">Hendrik Schuff</a>, <a href="/search/cs?searchtype=author&query=Jacovi%2C+A">Alon Jacovi</a>, <a href="/search/cs?searchtype=author&query=Adel%2C+H">Heike Adel</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Vu%2C+N+T">Ngoc Thang Vu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.11569v2-abstract-short" style="display: inline;"> While a lot of research in explainable AI focuses on producing effective explanations, less work is devoted to the question of how people understand and interpret the explanation. In this work, we focus on this question through a study of saliency-based explanations over textual data. Feature-attribution explanations of text models aim to communicate which parts of the input text were more influen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.11569v2-abstract-full').style.display = 'inline'; document.getElementById('2201.11569v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.11569v2-abstract-full" style="display: none;"> While a lot of research in explainable AI focuses on producing effective explanations, less work is devoted to the question of how people understand and interpret the explanation. In this work, we focus on this question through a study of saliency-based explanations over textual data. Feature-attribution explanations of text models aim to communicate which parts of the input text were more influential than others towards the model decision. Many current explanation methods, such as gradient-based or Shapley value-based methods, provide measures of importance which are well-understood mathematically. But how does a person receiving the explanation (the explainee) comprehend it? And does their understanding match what the explanation attempted to communicate? We empirically investigate the effect of various factors of the input, the feature-attribution explanation, and visualization procedure, on laypeople's interpretation of the explanation. We query crowdworkers for their interpretation on tasks in English and German, and fit a GAMM model to their responses considering the factors of interest. We find that people often mis-interpret the explanations: superficial and unrelated factors, such as word length, influence the explainees' importance assignment despite the explanation communicating importance directly. We then show that some of this distortion can be attenuated: we propose a method to adjust saliencies based on model estimates of over- and under-perception, and explore bar charts as an alternative to heatmap saliency visualization. We find that both approaches can attenuate the distorting effect of specific factors, leading to better-calibrated understanding of the explanation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.11569v2-abstract-full').style.display = 'none'; document.getElementById('2201.11569v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">FAccT 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.11239">arXiv:2201.11239</a> <span> [<a href="https://arxiv.org/pdf/2201.11239">pdf</a>, <a href="https://arxiv.org/format/2201.11239">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1613/jair.1.14053">10.1613/jair.1.14053 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Diagnosing AI Explanation Methods with Folk Concepts of Behavior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jacovi%2C+A">Alon Jacovi</a>, <a href="/search/cs?searchtype=author&query=Bastings%2C+J">Jasmijn Bastings</a>, <a href="/search/cs?searchtype=author&query=Gehrmann%2C+S">Sebastian Gehrmann</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Filippova%2C+K">Katja Filippova</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.11239v6-abstract-short" style="display: inline;"> We investigate a formalism for the conditions of a successful explanation of AI. We consider "success" to depend not only on what information the explanation contains, but also on what information the human explainee understands from it. Theory of mind literature discusses the folk concepts that humans use to understand and generalize behavior. We posit that folk concepts of behavior provide us wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.11239v6-abstract-full').style.display = 'inline'; document.getElementById('2201.11239v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.11239v6-abstract-full" style="display: none;"> We investigate a formalism for the conditions of a successful explanation of AI. We consider "success" to depend not only on what information the explanation contains, but also on what information the human explainee understands from it. Theory of mind literature discusses the folk concepts that humans use to understand and generalize behavior. We posit that folk concepts of behavior provide us with a "language" that humans understand behavior with. We use these folk concepts as a framework of social attribution by the human explainee - the information constructs that humans are likely to comprehend from explanations - by introducing a blueprint for an explanatory narrative (Figure 1) that explains AI behavior with these constructs. We then demonstrate that many XAI methods today can be mapped to folk concepts of behavior in a qualitative evaluation. This allows us to uncover their failure modes that prevent current methods from explaining successfully - i.e., the information constructs that are missing for any given XAI method, and whose inclusion can decrease the likelihood of misunderstanding AI behavior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.11239v6-abstract-full').style.display = 'none'; document.getElementById('2201.11239v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to JAIR (Vol. 78, 2023)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Journal of Artificial Intelligence Research 73 (2023) 459-489 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.05320">arXiv:2201.05320</a> <span> [<a href="https://arxiv.org/pdf/2201.05320">pdf</a>, <a href="https://arxiv.org/format/2201.05320">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CommonsenseQA 2.0: Exposing the Limits of AI through Gamification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Talmor%2C+A">Alon Talmor</a>, <a href="/search/cs?searchtype=author&query=Yoran%2C+O">Ori Yoran</a>, <a href="/search/cs?searchtype=author&query=Bras%2C+R+L">Ronan Le Bras</a>, <a href="/search/cs?searchtype=author&query=Bhagavatula%2C+C">Chandra Bhagavatula</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&query=Berant%2C+J">Jonathan Berant</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.05320v1-abstract-short" style="display: inline;"> Constructing benchmarks that test the abilities of modern natural language understanding models is difficult - pre-trained language models exploit artifacts in benchmarks to achieve human parity, but still fail on adversarial examples and make errors that demonstrate a lack of common sense. In this work, we propose gamification as a framework for data construction. The goal of players in the game… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.05320v1-abstract-full').style.display = 'inline'; document.getElementById('2201.05320v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.05320v1-abstract-full" style="display: none;"> Constructing benchmarks that test the abilities of modern natural language understanding models is difficult - pre-trained language models exploit artifacts in benchmarks to achieve human parity, but still fail on adversarial examples and make errors that demonstrate a lack of common sense. In this work, we propose gamification as a framework for data construction. The goal of players in the game is to compose questions that mislead a rival AI while using specific phrases for extra points. The game environment leads to enhanced user engagement and simultaneously gives the game designer control over the collected data, allowing us to collect high-quality data at scale. Using our method we create CommonsenseQA 2.0, which includes 14,343 yes/no questions, and demonstrate its difficulty for models that are orders-of-magnitude larger than the AI used in the game itself. Our best baseline, the T5-based Unicorn with 11B parameters achieves an accuracy of 70.2%, substantially higher than GPT-3 (52.9%) in a few-shot inference setup. Both score well below human performance which is at 94.1%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.05320v1-abstract-full').style.display = 'none'; document.getElementById('2201.05320v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented as Oral at NeurIPS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.14330">arXiv:2112.14330</a> <span> [<a href="https://arxiv.org/pdf/2112.14330">pdf</a>, <a href="https://arxiv.org/format/2112.14330">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Simple, Interpretable and Stable Method for Detecting Words with Usage Change across Corpora </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gonen%2C+H">Hila Gonen</a>, <a href="/search/cs?searchtype=author&query=Jawahar%2C+G">Ganesh Jawahar</a>, <a href="/search/cs?searchtype=author&query=Seddah%2C+D">Djam茅 Seddah</a>, <a href="/search/cs?searchtype=author&query=Goldberg%2C+Y">Yoav Goldberg</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.14330v1-abstract-short" style="display: inline;"> The problem of comparing two bodies of text and searching for words that differ in their usage between them arises often in digital humanities and computational social science. This is commonly approached by training word embeddings on each corpus, aligning the vector spaces, and looking for words whose cosine distance in the aligned space is large. However, these methods often require extensive f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.14330v1-abstract-full').style.display = 'inline'; document.getElementById('2112.14330v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.14330v1-abstract-full" style="display: none;"> The problem of comparing two bodies of text and searching for words that differ in their usage between them arises often in digital humanities and computational social science. This is commonly approached by training word embeddings on each corpus, aligning the vector spaces, and looking for words whose cosine distance in the aligned space is large. However, these methods often require extensive filtering of the vocabulary to perform well, and - as we show in this work - result in unstable, and hence less reliable, results. We propose an alternative approach that does not use vector space alignment, and instead considers the neighbors of each word. The method is simple, interpretable and stable. We demonstrate its effectiveness in 9 different setups, considering different corpus splitting criteria (age, gender and profession of tweet authors, time of tweet) and different languages (English, French and Hebrew). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.14330v1-abstract-full').style.display = 'none'; document.getElementById('2112.14330v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in ACL 2020</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Goldberg%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Goldberg%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Goldberg%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Goldberg%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository