CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 124 results for author: <span class="mathjax">Augenstein, I</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Augenstein, I"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Augenstein%2C+I&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Augenstein, I"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11995">arXiv:2502.11995</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11995">pdf</a>, <a href="https://arxiv.org/format/2502.11995">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Presumed Cultural Identity: How Names Shape LLM Responses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Pawar%2C+S">Siddhesh Pawar</a>, <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Kaffee%2C+L">Lucie-Aim茅e Kaffee</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11995v1-abstract-short" style="display: inline;"> Names are deeply tied to human identity. They can serve as markers of individuality, cultural heritage, and personal history. However, using names as a core indicator of identity can lead to over-simplification of complex identities. When interacting with LLMs, user names are an important point of information for personalisation. Names can enter chatbot conversations through direct user input (req&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11995v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11995v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11995v1-abstract-full" style="display: none;"> Names are deeply tied to human identity. They can serve as markers of individuality, cultural heritage, and personal history. However, using names as a core indicator of identity can lead to over-simplification of complex identities. When interacting with LLMs, user names are an important point of information for personalisation. Names can enter chatbot conversations through direct user input (requested by chatbots), as part of task contexts such as CV reviews, or as built-in memory features that store user information for personalisation. We study biases associated with names by measuring cultural presumptions in the responses generated by LLMs when presented with common suggestion-seeking queries, which might involve making assumptions about the user. Our analyses demonstrate strong assumptions about cultural identity associated with names present in LLM generations across multiple cultures. Our work has implications for designing more nuanced personalisation systems that avoid reinforcing stereotypes while maintaining meaningful customisation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11995v1-abstract-full').style.display = 'none'; document.getElementById('2502.11995v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 Pages, 13 Figures, 4 Tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09083">arXiv:2502.09083</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.09083">pdf</a>, <a href="https://arxiv.org/format/2502.09083">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713277">10.1145/3706598.3713277 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Show Me the Work: Fact-Checkers&#39; Requirements for Explainable Automated Fact-Checking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Warren%2C+G">Greta Warren</a>, <a href="/search/?searchtype=author&amp;query=Shklovski%2C+I">Irina Shklovski</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09083v1-abstract-short" style="display: inline;"> The pervasiveness of large language models and generative AI in online media has amplified the need for effective automated fact-checking to assist fact-checkers in tackling the increasing volume and sophistication of misinformation. The complex nature of fact-checking demands that automated fact-checking systems provide explanations that enable fact-checkers to scrutinise their outputs. However,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09083v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09083v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09083v1-abstract-full" style="display: none;"> The pervasiveness of large language models and generative AI in online media has amplified the need for effective automated fact-checking to assist fact-checkers in tackling the increasing volume and sophistication of misinformation. The complex nature of fact-checking demands that automated fact-checking systems provide explanations that enable fact-checkers to scrutinise their outputs. However, it is unclear how these explanations should align with the decision-making and reasoning processes of fact-checkers to be effectively integrated into their workflows. Through semi-structured interviews with fact-checking professionals, we bridge this gap by: (i) providing an account of how fact-checkers assess evidence, make decisions, and explain their processes; (ii) examining how fact-checkers use automated tools in practice; and (iii) identifying fact-checker explanation requirements for automated fact-checking tools. The findings show unmet explanation needs and identify important criteria for replicable fact-checking explanations that trace the model&#39;s reasoning path, reference specific evidence, and highlight uncertainty and information gaps. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09083v1-abstract-full').style.display = 'none'; document.getElementById('2502.09083v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Conditionally accepted to CHI&#39;25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07068">arXiv:2502.07068</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07068">pdf</a>, <a href="https://arxiv.org/format/2502.07068">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Specializing Large Language Models to Simulate Survey Response Distributions for Global Populations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Cao%2C+Y">Yong Cao</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+H">Haijiang Liu</a>, <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=R%C3%B6ttger%2C+P">Paul R枚ttger</a>, <a href="/search/?searchtype=author&amp;query=Hershcovich%2C+D">Daniel Hershcovich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07068v2-abstract-short" style="display: inline;"> Large-scale surveys are essential tools for informing social science research and policy, but running surveys is costly and time-intensive. If we could accurately simulate group-level survey results, this would therefore be very valuable to social science research. Prior work has explored the use of large language models (LLMs) for simulating human behaviors, mostly through prompting. In this pape&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07068v2-abstract-full').style.display = 'inline'; document.getElementById('2502.07068v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07068v2-abstract-full" style="display: none;"> Large-scale surveys are essential tools for informing social science research and policy, but running surveys is costly and time-intensive. If we could accurately simulate group-level survey results, this would therefore be very valuable to social science research. Prior work has explored the use of large language models (LLMs) for simulating human behaviors, mostly through prompting. In this paper, we are the first to specialize LLMs for the task of simulating survey response distributions. As a testbed, we use country-level results from two global cultural surveys. We devise a fine-tuning method based on first-token probabilities to minimize divergence between predicted and actual response distributions for a given question. Then, we show that this method substantially outperforms other methods and zero-shot classifiers, even on unseen questions, countries, and a completely unseen survey. While even our best models struggle with the task, especially on unseen questions, our results demonstrate the benefits of specialization for simulation, which may accelerate progress towards sufficiently accurate simulation in the future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07068v2-abstract-full').style.display = 'none'; document.getElementById('2502.07068v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 9 figures, accepted to NAACL 2025 main</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18265">arXiv:2501.18265</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.18265">pdf</a>, <a href="https://arxiv.org/format/2501.18265">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Collecting Cost-Effective, High-Quality Truthfulness Assessments with LLM Summarized Evidence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Roitero%2C+K">Kevin Roitero</a>, <a href="/search/?searchtype=author&amp;query=Wright%2C+D">Dustin Wright</a>, <a href="/search/?searchtype=author&amp;query=Soprano%2C+M">Michael Soprano</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Mizzaro%2C+S">Stefano Mizzaro</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18265v1-abstract-short" style="display: inline;"> With the degradation of guardrails against mis- and disinformation online, it is more critical than ever to be able to effectively combat it. In this paper, we explore the efficiency and effectiveness of using crowd-sourced truthfulness assessments based on condensed, large language model (LLM) generated summaries of online sources. We compare the use of generated summaries to the use of original&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18265v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18265v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18265v1-abstract-full" style="display: none;"> With the degradation of guardrails against mis- and disinformation online, it is more critical than ever to be able to effectively combat it. In this paper, we explore the efficiency and effectiveness of using crowd-sourced truthfulness assessments based on condensed, large language model (LLM) generated summaries of online sources. We compare the use of generated summaries to the use of original web pages in an A/B testing setting, where we employ a large and diverse pool of crowd-workers to perform the truthfulness assessment. We evaluate the quality of assessments, the efficiency with which assessments are performed, and the behavior and engagement of participants. Our results demonstrate that the Summary modality, which relies on summarized evidence, offers no significant change in assessment accuracy over the Standard modality, while significantly increasing the speed with which assessments are performed. Workers using summarized evidence produce a significantly higher number of assessments in the same time frame, reducing the cost needed to acquire truthfulness assessments. Additionally, the Summary modality maximizes both the inter-annotator agreements as well as the reliance on and perceived usefulness of evidence, demonstrating the utility of summarized evidence without sacrificing the quality of assessments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18265v1-abstract-full').style.display = 'none'; document.getElementById('2501.18265v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages; 7 figures; 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12275">arXiv:2501.12275</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.12275">pdf</a>, <a href="https://arxiv.org/format/2501.12275">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> With Great Backbones Comes Great Adversarial Transferability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/?searchtype=author&amp;query=Hambardzumyan%2C+K">Karen Hambardzumyan</a>, <a href="/search/?searchtype=author&amp;query=Papikyan%2C+D">Davit Papikyan</a>, <a href="/search/?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/?searchtype=author&amp;query=Gordo%2C+A">Albert Gordo</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Markosyan%2C+A+H">Aram H. Markosyan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12275v1-abstract-short" style="display: inline;"> Advances in self-supervised learning (SSL) for machine vision have improved representation robustness and model performance, giving rise to pre-trained backbones like \emph{ResNet} and \emph{ViT} models tuned with SSL methods such as \emph{SimCLR}. Due to the computational and data demands of pre-training, the utilization of such backbones becomes a strenuous necessity. However, employing these ba&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12275v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12275v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12275v1-abstract-full" style="display: none;"> Advances in self-supervised learning (SSL) for machine vision have improved representation robustness and model performance, giving rise to pre-trained backbones like \emph{ResNet} and \emph{ViT} models tuned with SSL methods such as \emph{SimCLR}. Due to the computational and data demands of pre-training, the utilization of such backbones becomes a strenuous necessity. However, employing these backbones may inherit vulnerabilities to adversarial attacks. While adversarial robustness has been studied under \emph{white-box} and \emph{black-box} settings, the robustness of models tuned on pre-trained backbones remains largely unexplored. Additionally, the role of tuning meta-information in mitigating exploitation risks is unclear. This work systematically evaluates the adversarial robustness of such models across $20,000$ combinations of tuning meta-information, including fine-tuning techniques, backbone families, datasets, and attack types. We propose using proxy models to transfer attacks, simulating varying levels of target knowledge by fine-tuning these proxies with diverse configurations. Our findings reveal that proxy-based attacks approach the effectiveness of \emph{white-box} methods, even with minimal tuning knowledge. We also introduce a naive &#34;backbone attack,&#34; leveraging only the backbone to generate adversarial samples, which outperforms \emph{black-box} attacks and rivals \emph{white-box} methods, highlighting critical risks in model-sharing practices. Finally, our ablations reveal how increasing tuning meta-information impacts attack transferability, measuring each meta-information combination. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12275v1-abstract-full').style.display = 'none'; document.getElementById('2501.12275v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17031">arXiv:2412.17031</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.17031">pdf</a>, <a href="https://arxiv.org/format/2412.17031">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Reality Check on Context Utilisation for Retrieval-Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Hagstr%C3%B6m%2C+L">Lovisa Hagstr枚m</a>, <a href="/search/?searchtype=author&amp;query=Marjanovi%C4%87%2C+S+V">Sara Vera Marjanovi膰</a>, <a href="/search/?searchtype=author&amp;query=Yu%2C+H">Haeun Yu</a>, <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Lioma%2C+C">Christina Lioma</a>, <a href="/search/?searchtype=author&amp;query=Maistro%2C+M">Maria Maistro</a>, <a href="/search/?searchtype=author&amp;query=Atanasova%2C+P">Pepa Atanasova</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17031v1-abstract-short" style="display: inline;"> Retrieval-augmented generation (RAG) helps address the limitations of the parametric knowledge embedded within a language model (LM). However, investigations of how LMs utilise retrieved information of varying complexity in real-world scenarios have been limited to synthetic contexts. We introduce DRUID (Dataset of Retrieved Unreliable, Insufficient and Difficult-to-understand contexts) with real-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17031v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17031v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17031v1-abstract-full" style="display: none;"> Retrieval-augmented generation (RAG) helps address the limitations of the parametric knowledge embedded within a language model (LM). However, investigations of how LMs utilise retrieved information of varying complexity in real-world scenarios have been limited to synthetic contexts. We introduce DRUID (Dataset of Retrieved Unreliable, Insufficient and Difficult-to-understand contexts) with real-world queries and contexts manually annotated for stance. The dataset is based on the prototypical task of automated claim verification, for which automated retrieval of real-world evidence is crucial. We compare DRUID to synthetic datasets (CounterFact, ConflictQA) and find that artificial datasets often fail to represent the complex and diverse real-world context settings. We show that synthetic datasets exaggerate context characteristics rare in real retrieved data, which leads to inflated context utilisation results, as measured by our novel ACU score. Moreover, while previous work has mainly focused on singleton context characteristics to explain context utilisation, correlations between singleton context properties and ACU on DRUID are surprisingly small compared to other properties related to context source. Overall, our work underscores the need for real-world aligned context utilisation studies to represent and improve performance in real-world RAG settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17031v1-abstract-full').style.display = 'none'; document.getElementById('2412.17031v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">43 pages, 18 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12318">arXiv:2412.12318</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2412.12318">pdf</a>, <a href="https://arxiv.org/format/2412.12318">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Graph-Guided Textual Explanation Generation Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Yuan%2C+S">Shuzhou Yuan</a>, <a href="/search/?searchtype=author&amp;query=Sun%2C+J">Jingyi Sun</a>, <a href="/search/?searchtype=author&amp;query=Zhang%2C+R">Ran Zhang</a>, <a href="/search/?searchtype=author&amp;query=F%C3%A4rber%2C+M">Michael F盲rber</a>, <a href="/search/?searchtype=author&amp;query=Eger%2C+S">Steffen Eger</a>, <a href="/search/?searchtype=author&amp;query=Atanasova%2C+P">Pepa Atanasova</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12318v1-abstract-short" style="display: inline;"> Natural language explanations (NLEs) are commonly used to provide plausible free-text explanations of a model&#39;s reasoning about its predictions. However, recent work has questioned the faithfulness of NLEs, as they may not accurately reflect the model&#39;s internal reasoning process regarding its predicted answer. In contrast, highlight explanations -- input fragments identified as critical for the m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12318v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12318v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12318v1-abstract-full" style="display: none;"> Natural language explanations (NLEs) are commonly used to provide plausible free-text explanations of a model&#39;s reasoning about its predictions. However, recent work has questioned the faithfulness of NLEs, as they may not accurately reflect the model&#39;s internal reasoning process regarding its predicted answer. In contrast, highlight explanations -- input fragments identified as critical for the model&#39;s predictions -- exhibit measurable faithfulness, which has been incrementally improved through existing research. Building on this foundation, we propose G-Tex, a Graph-Guided Textual Explanation Generation framework designed to enhance the faithfulness of NLEs by leveraging highlight explanations. Specifically, highlight explanations are extracted as highly faithful cues representing the model&#39;s reasoning and are subsequently encoded through a graph neural network layer, which explicitly guides the NLE generation process. This alignment ensures that the generated explanations closely reflect the model&#39;s underlying reasoning. Experiments on T5 and BART using three reasoning datasets show that G-Tex improves NLE faithfulness by up to 17.59% compared to baseline methods. Additionally, G-Tex generates NLEs with greater semantic and lexical similarity to human-written ones. Human evaluations show that G-Tex can decrease redundant content and enhance the overall quality of NLEs. As our work introduces a novel method for explicitly guiding NLE generation to improve faithfulness, we hope it will serve as a stepping stone for addressing additional criteria for NLE and generated text overall. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12318v1-abstract-full').style.display = 'none'; document.getElementById('2412.12318v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00860">arXiv:2411.00860</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.00860">pdf</a>, <a href="https://arxiv.org/format/2411.00860">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Survey of Cultural Awareness in Language Models: Text and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Pawar%2C+S">Siddhesh Pawar</a>, <a href="/search/?searchtype=author&amp;query=Park%2C+J">Junyeong Park</a>, <a href="/search/?searchtype=author&amp;query=Jin%2C+J">Jiho Jin</a>, <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Myung%2C+J">Junho Myung</a>, <a href="/search/?searchtype=author&amp;query=Yadav%2C+S">Srishti Yadav</a>, <a href="/search/?searchtype=author&amp;query=Haznitrama%2C+F+G">Faiz Ghifari Haznitrama</a>, <a href="/search/?searchtype=author&amp;query=Song%2C+I">Inhwa Song</a>, <a href="/search/?searchtype=author&amp;query=Oh%2C+A">Alice Oh</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00860v1-abstract-short" style="display: inline;"> Large-scale deployment of large language models (LLMs) in various applications, such as chatbots and virtual assistants, requires LLMs to be culturally sensitive to the user to ensure inclusivity. Culture has been widely studied in psychology and anthropology, and there has been a recent surge in research on making LLMs more culturally inclusive in LLMs that goes beyond multilinguality and builds&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00860v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00860v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00860v1-abstract-full" style="display: none;"> Large-scale deployment of large language models (LLMs) in various applications, such as chatbots and virtual assistants, requires LLMs to be culturally sensitive to the user to ensure inclusivity. Culture has been widely studied in psychology and anthropology, and there has been a recent surge in research on making LLMs more culturally inclusive in LLMs that goes beyond multilinguality and builds on findings from psychology and anthropology. In this paper, we survey efforts towards incorporating cultural awareness into text-based and multimodal LLMs. We start by defining cultural awareness in LLMs, taking the definitions of culture from anthropology and psychology as a point of departure. We then examine methodologies adopted for creating cross-cultural datasets, strategies for cultural inclusion in downstream tasks, and methodologies that have been used for benchmarking cultural awareness in LLMs. Further, we discuss the ethical implications of cultural alignment, the role of Human-Computer Interaction in driving cultural inclusion in LLMs, and the role of cultural alignment in driving social science research. We finally provide pointers to future research based on our findings about gaps in the literature. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00860v1-abstract-full').style.display = 'none'; document.getElementById('2411.00860v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11900">arXiv:2410.11900</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.11900">pdf</a>, <a href="https://arxiv.org/format/2410.11900">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> FLARE: Faithful Logic-Aided Reasoning and Exploration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/?searchtype=author&amp;query=Verga%2C+P">Pat Verga</a>, <a href="/search/?searchtype=author&amp;query=Lewis%2C+P">Patrick Lewis</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11900v4-abstract-short" style="display: inline;"> Modern Question Answering (QA) and Reasoning approaches based on Large Language Models (LLMs) commonly use prompting techniques, such as Chain-of-Thought (CoT), assuming the resulting generation will have a more granular exploration and reasoning over the question space and scope. However, such methods struggle with generating outputs that are faithful to the intermediate chain of reasoning produc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11900v4-abstract-full').style.display = 'inline'; document.getElementById('2410.11900v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11900v4-abstract-full" style="display: none;"> Modern Question Answering (QA) and Reasoning approaches based on Large Language Models (LLMs) commonly use prompting techniques, such as Chain-of-Thought (CoT), assuming the resulting generation will have a more granular exploration and reasoning over the question space and scope. However, such methods struggle with generating outputs that are faithful to the intermediate chain of reasoning produced by the model. On the other end of the spectrum, neuro-symbolic methods such as Faithful CoT (F-CoT) propose to combine LLMs with external symbolic solvers. While such approaches boast a high degree of faithfulness, they usually require a model trained for code generation and struggle with tasks that are ambiguous or hard to formalise strictly. We introduce $\textbf{F}$aithful $\textbf{L}$ogic-$\textbf{A}$ided $\textbf{R}$easoning and $\textbf{E}$xploration ($\textbf{FLARE}$), a novel interpretable approach for traversing the problem space using task decompositions. We use the LLM to plan a solution, soft-formalise the query into facts and predicates using a logic programming code and simulate that code execution using an exhaustive multi-hop search over the defined space. Our method allows us to compute the faithfulness of the reasoning process w.r.t. the generated code and analyse the steps of the multi-hop search without relying on external solvers. Our methods achieve SOTA results on $\mathbf{7}$ out of $\mathbf{9}$ diverse reasoning benchmarks. We also show that model faithfulness positively correlates with overall performance and further demonstrate that $\textbf{FLARE}$ allows pinpointing the decisive factors sufficient for and leading to the correct answer with optimal reasoning during the multi-hop search. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11900v4-abstract-full').style.display = 'none'; document.getElementById('2410.11900v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03001">arXiv:2410.03001</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.03001">pdf</a>, <a href="https://arxiv.org/format/2410.03001">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Can Transformers Learn $n$-gram Language Models? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Svete%2C+A">Anej Svete</a>, <a href="/search/?searchtype=author&amp;query=Borenstein%2C+N">Nadav Borenstein</a>, <a href="/search/?searchtype=author&amp;query=Zhou%2C+M">Mike Zhou</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Cotterell%2C+R">Ryan Cotterell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03001v1-abstract-short" style="display: inline;"> Much theoretical work has described the ability of transformers to represent formal languages. However, linking theoretical results to empirical performance is not straightforward due to the complex interplay between the architecture, the learning algorithm, and training data. To test whether theoretical lower bounds imply \emph{learnability} of formal languages, we turn to recent work relating tr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03001v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03001v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03001v1-abstract-full" style="display: none;"> Much theoretical work has described the ability of transformers to represent formal languages. However, linking theoretical results to empirical performance is not straightforward due to the complex interplay between the architecture, the learning algorithm, and training data. To test whether theoretical lower bounds imply \emph{learnability} of formal languages, we turn to recent work relating transformers to $n$-gram language models (LMs). We study transformers&#39; ability to learn random $n$-gram LMs of two kinds: ones with arbitrary next-symbol probabilities and ones where those are defined with shared parameters. We find that classic estimation techniques for $n$-gram LMs such as add-$位$ smoothing outperform transformers on the former, while transformers perform better on the latter, outperforming methods specifically designed to learn $n$-gram LMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03001v1-abstract-full').style.display = 'none'; document.getElementById('2410.03001v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14317">arXiv:2408.14317</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.14317">pdf</a>, <a href="https://arxiv.org/format/2408.14317">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Claim Verification in the Age of Large Language Models: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Dmonte%2C+A">Alphaeus Dmonte</a>, <a href="/search/?searchtype=author&amp;query=Oruche%2C+R">Roland Oruche</a>, <a href="/search/?searchtype=author&amp;query=Zampieri%2C+M">Marcos Zampieri</a>, <a href="/search/?searchtype=author&amp;query=Calyam%2C+P">Prasad Calyam</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14317v2-abstract-short" style="display: inline;"> The large and ever-increasing amount of data available on the Internet coupled with the laborious task of manual claim and fact verification has sparked the interest in the development of automated claim verification systems. Several deep learning and transformer-based models have been proposed for this task over the years. With the introduction of Large Language Models (LLMs) and their superior p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14317v2-abstract-full').style.display = 'inline'; document.getElementById('2408.14317v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14317v2-abstract-full" style="display: none;"> The large and ever-increasing amount of data available on the Internet coupled with the laborious task of manual claim and fact verification has sparked the interest in the development of automated claim verification systems. Several deep learning and transformer-based models have been proposed for this task over the years. With the introduction of Large Language Models (LLMs) and their superior performance in several NLP tasks, we have seen a surge of LLM-based approaches to claim verification along with the use of novel methods such as Retrieval Augmented Generation (RAG). In this survey, we present a comprehensive account of recent claim verification frameworks using LLMs. We describe the different components of the claim verification pipeline used in these frameworks in detail including common approaches to retrieval, prompting, and fine-tuning. Finally, we describe publicly available English datasets created for this task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14317v2-abstract-full').style.display = 'none'; document.getElementById('2408.14317v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17023">arXiv:2407.17023</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.17023">pdf</a>, <a href="https://arxiv.org/format/2407.17023">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DYNAMICQA: Tracing Internal Knowledge Conflicts in Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Marjanovi%C4%87%2C+S+V">Sara Vera Marjanovi膰</a>, <a href="/search/?searchtype=author&amp;query=Yu%2C+H">Haeun Yu</a>, <a href="/search/?searchtype=author&amp;query=Atanasova%2C+P">Pepa Atanasova</a>, <a href="/search/?searchtype=author&amp;query=Maistro%2C+M">Maria Maistro</a>, <a href="/search/?searchtype=author&amp;query=Lioma%2C+C">Christina Lioma</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17023v2-abstract-short" style="display: inline;"> Knowledge-intensive language understanding tasks require Language Models (LMs) to integrate relevant context, mitigating their inherent weaknesses, such as incomplete or outdated knowledge. However, conflicting knowledge can be present in the LM&#39;s parameters, termed intra-memory conflict, which can affect a model&#39;s propensity to accept contextual knowledge. To study the effect of intra-memory conf&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17023v2-abstract-full').style.display = 'inline'; document.getElementById('2407.17023v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17023v2-abstract-full" style="display: none;"> Knowledge-intensive language understanding tasks require Language Models (LMs) to integrate relevant context, mitigating their inherent weaknesses, such as incomplete or outdated knowledge. However, conflicting knowledge can be present in the LM&#39;s parameters, termed intra-memory conflict, which can affect a model&#39;s propensity to accept contextual knowledge. To study the effect of intra-memory conflict on an LM&#39;s ability to accept relevant context, we utilize two knowledge conflict measures and a novel dataset containing inherently conflicting data, DynamicQA. This dataset includes facts with a temporal dynamic nature where facts can change over time and disputable dynamic facts, which can change depending on the viewpoint. DynamicQA is the first to include real-world knowledge conflicts and provide context to study the link between the different types of knowledge conflicts. We also evaluate several measures on their ability to reflect the presence of intra-memory conflict: semantic entropy and a novel coherent persuasion score. With our extensive experiments, we verify that LMs exhibit a greater degree of intra-memory conflict with dynamic facts compared to facts that have a single truth value. Furthermore, we reveal that facts with intra-memory conflict are harder to update with context, suggesting that retrieval-augmented generation will struggle with the most commonly adapted facts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17023v2-abstract-full').style.display = 'none'; document.getElementById('2407.17023v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 6 figures, Accepted to Findings of EMNLP 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T50 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19238">arXiv:2406.19238</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.19238">pdf</a>, <a href="https://arxiv.org/format/2406.19238">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Revealing Fine-Grained Values and Opinions in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Wright%2C+D">Dustin Wright</a>, <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Borenstein%2C+N">Nadav Borenstein</a>, <a href="/search/?searchtype=author&amp;query=Yadav%2C+S">Srishti Yadav</a>, <a href="/search/?searchtype=author&amp;query=Belongie%2C+S">Serge Belongie</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19238v2-abstract-short" style="display: inline;"> Uncovering latent values and opinions embedded in large language models (LLMs) can help identify biases and mitigate potential harm. Recently, this has been approached by prompting LLMs with survey questions and quantifying the stances in the outputs towards morally and politically charged statements. However, the stances generated by LLMs can vary greatly depending on how they are prompted, and t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19238v2-abstract-full').style.display = 'inline'; document.getElementById('2406.19238v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19238v2-abstract-full" style="display: none;"> Uncovering latent values and opinions embedded in large language models (LLMs) can help identify biases and mitigate potential harm. Recently, this has been approached by prompting LLMs with survey questions and quantifying the stances in the outputs towards morally and politically charged statements. However, the stances generated by LLMs can vary greatly depending on how they are prompted, and there are many ways to argue for or against a given position. In this work, we propose to address this by analysing a large and robust dataset of 156k LLM responses to the 62 propositions of the Political Compass Test (PCT) generated by 6 LLMs using 420 prompt variations. We perform coarse-grained analysis of their generated stances and fine-grained analysis of the plain text justifications for those stances. For fine-grained analysis, we propose to identify tropes in the responses: semantically similar phrases that are recurrent and consistent across different prompts, revealing natural patterns in the text that a given LLM is prone to produce. We find that demographic features added to prompts significantly affect outcomes on the PCT, reflecting bias, as well as disparities between the results of tests when eliciting closed-form vs. open domain responses. Additionally, patterns in the plain text rationales via tropes show that similar justifications are repeatedly generated across models and prompts even with disparate stances. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19238v2-abstract-full').style.display = 'none'; document.getElementById('2406.19238v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings of EMNLP 2024; 28 pages, 20 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17753">arXiv:2406.17753</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.17753">pdf</a>, <a href="https://arxiv.org/format/2406.17753">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Measuring and Benchmarking Large Language Models&#39; Capabilities to Generate Persuasive Language </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Pauli%2C+A+B">Amalie Brogaard Pauli</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Assent%2C+I">Ira Assent</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17753v2-abstract-short" style="display: inline;"> We are exposed to much information trying to influence us, such as teaser messages, debates, politically framed news, and propaganda - all of which use persuasive language. With the recent interest in Large Language Models (LLMs), we study the ability of LLMs to produce persuasive text. As opposed to prior work which focuses on particular domains or types of persuasion, we conduct a general study&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17753v2-abstract-full').style.display = 'inline'; document.getElementById('2406.17753v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17753v2-abstract-full" style="display: none;"> We are exposed to much information trying to influence us, such as teaser messages, debates, politically framed news, and propaganda - all of which use persuasive language. With the recent interest in Large Language Models (LLMs), we study the ability of LLMs to produce persuasive text. As opposed to prior work which focuses on particular domains or types of persuasion, we conduct a general study across various domains to measure and benchmark to what degree LLMs produce persuasive language - both when explicitly instructed to rewrite text to be more or less persuasive and when only instructed to paraphrase. We construct the new dataset Persuasive-Pairs of pairs of a short text and its rewrite by an LLM to amplify or diminish persuasive language. We multi-annotate the pairs on a relative scale for persuasive language: a valuable resource in itself, and for training a regression model to score and benchmark persuasive language, including for new LLMs across domains. In our analysis, we find that different &#39;personas&#39; in LLaMA3&#39;s system prompt change persuasive language substantially, even when only instructed to paraphrase. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17753v2-abstract-full').style.display = 'none'; document.getElementById('2406.17753v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15085">arXiv:2406.15085</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.15085">pdf</a>, <a href="https://arxiv.org/format/2406.15085">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Input Feature Explanations through a Unified Diagnostic Evaluation Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Sun%2C+J">Jingyi Sun</a>, <a href="/search/?searchtype=author&amp;query=Atanasova%2C+P">Pepa Atanasova</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15085v2-abstract-short" style="display: inline;"> Explaining the decision-making process of machine learning models is crucial for ensuring their reliability and transparency for end users. One popular explanation form highlights key input features, such as i) tokens (e.g., Shapley Values and Integrated Gradients), ii) interactions between tokens (e.g., Bivariate Shapley and Attention-based methods), or iii) interactions between spans of the inpu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15085v2-abstract-full').style.display = 'inline'; document.getElementById('2406.15085v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15085v2-abstract-full" style="display: none;"> Explaining the decision-making process of machine learning models is crucial for ensuring their reliability and transparency for end users. One popular explanation form highlights key input features, such as i) tokens (e.g., Shapley Values and Integrated Gradients), ii) interactions between tokens (e.g., Bivariate Shapley and Attention-based methods), or iii) interactions between spans of the input (e.g., Louvain Span Interactions). However, these explanation types have only been studied in isolation, making it difficult to judge their respective applicability. To bridge this gap, we develop a unified framework that facilitates an automated and direct comparison between highlight and interactive explanations comprised of four diagnostic properties. We conduct an extensive analysis across these three types of input feature explanations -- each utilizing three different explanation techniques -- across two datasets and two models, and reveal that each explanation has distinct strengths across the different diagnostic properties. Nevertheless, interactive span explanations outperform other types of input feature explanations across most diagnostic properties. Despite being relatively understudied, our analysis underscores the need for further research to improve methods generating these explanation types. Additionally, integrating them with other explanation types that perform better in certain characteristics could further enhance their overall effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15085v2-abstract-full').style.display = 'none'; document.getElementById('2406.15085v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T50 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.14425">arXiv:2406.14425</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.14425">pdf</a>, <a href="https://arxiv.org/format/2406.14425">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SynDARin: Synthesising Datasets for Automated Reasoning in Low-Resource Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Ghazaryan%2C+G">Gayane Ghazaryan</a>, <a href="/search/?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.14425v3-abstract-short" style="display: inline;"> Question Answering (QA) datasets have been instrumental in developing and evaluating Large Language Model (LLM) capabilities. However, such datasets are scarce for languages other than English due to the cost and difficulties of collection and manual annotation. This means that producing novel models and measuring the performance of multilingual LLMs in low-resource languages is challenging. To mi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14425v3-abstract-full').style.display = 'inline'; document.getElementById('2406.14425v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.14425v3-abstract-full" style="display: none;"> Question Answering (QA) datasets have been instrumental in developing and evaluating Large Language Model (LLM) capabilities. However, such datasets are scarce for languages other than English due to the cost and difficulties of collection and manual annotation. This means that producing novel models and measuring the performance of multilingual LLMs in low-resource languages is challenging. To mitigate this, we propose $\textbf{S}$yn$\textbf{DAR}$in, a method for generating and validating QA datasets for low-resource languages. We utilize parallel content mining to obtain $\textit{human-curated}$ paragraphs between English and the target language. We use the English data as context to $\textit{generate}$ synthetic multiple-choice (MC) question-answer pairs, which are automatically translated and further validated for quality. Combining these with their designated non-English $\textit{human-curated}$ paragraphs form the final QA dataset. The method allows to maintain the content quality, reduces the likelihood of factual errors, and circumvents the need for costly annotation. To test the method, we created a QA dataset with $1.2$K samples for the Armenian language. The human evaluation shows that $98\%$ of the generated English data maintains quality and diversity in the question types and topics, while the translation validation pipeline can filter out $\sim70\%$ of data with poor quality. We use the dataset to benchmark state-of-the-art LLMs, showing their inability to achieve human accuracy with some model performances closer to random chance. This shows that the generated dataset is non-trivial and can be used to evaluate reasoning capabilities in low-resource language. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.14425v3-abstract-full').style.display = 'none'; document.getElementById('2406.14425v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04289">arXiv:2406.04289</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.04289">pdf</a>, <a href="https://arxiv.org/format/2406.04289">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> What Languages are Easy to Language-Model? A Perspective from Learning Probabilistic Regular Languages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Borenstein%2C+N">Nadav Borenstein</a>, <a href="/search/?searchtype=author&amp;query=Svete%2C+A">Anej Svete</a>, <a href="/search/?searchtype=author&amp;query=Chan%2C+R">Robin Chan</a>, <a href="/search/?searchtype=author&amp;query=Valvoda%2C+J">Josef Valvoda</a>, <a href="/search/?searchtype=author&amp;query=Nowak%2C+F">Franz Nowak</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Chodroff%2C+E">Eleanor Chodroff</a>, <a href="/search/?searchtype=author&amp;query=Cotterell%2C+R">Ryan Cotterell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04289v5-abstract-short" style="display: inline;"> What can large language models learn? By definition, language models (LM) are distributions over strings. Therefore, an intuitive way of addressing the above question is to formalize it as a matter of learnability of classes of distributions over strings. While prior work in this direction focused on assessing the theoretical limits, in contrast, we seek to understand the empirical learnability. U&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04289v5-abstract-full').style.display = 'inline'; document.getElementById('2406.04289v5-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04289v5-abstract-full" style="display: none;"> What can large language models learn? By definition, language models (LM) are distributions over strings. Therefore, an intuitive way of addressing the above question is to formalize it as a matter of learnability of classes of distributions over strings. While prior work in this direction focused on assessing the theoretical limits, in contrast, we seek to understand the empirical learnability. Unlike prior empirical work, we evaluate neural LMs on their home turf-learning probabilistic languages-rather than as classifiers of formal languages. In particular, we investigate the learnability of regular LMs (RLMs) by RNN and Transformer LMs. We empirically test the learnability of RLMs as a function of various complexity parameters of the RLM and the hidden state size of the neural LM. We find that the RLM rank, which corresponds to the size of linear space spanned by the logits of its conditional distributions, and the expected length of sampled strings are strong and significant predictors of learnability for both RNNs and Transformers. Several other predictors also reach significance, but with differing patterns between RNNs and Transformers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04289v5-abstract-full').style.display = 'none'; document.getElementById('2406.04289v5-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.18655">arXiv:2404.18655</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.18655">pdf</a>, <a href="https://arxiv.org/format/2404.18655">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Revealing the Parametric Knowledge of Language Models: A Unified Framework for Attribution Methods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Yu%2C+H">Haeun Yu</a>, <a href="/search/?searchtype=author&amp;query=Atanasova%2C+P">Pepa Atanasova</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.18655v1-abstract-short" style="display: inline;"> Language Models (LMs) acquire parametric knowledge from their training process, embedding it within their weights. The increasing scalability of LMs, however, poses significant challenges for understanding a model&#39;s inner workings and further for updating or correcting this embedded knowledge without the significant cost of retraining. This underscores the importance of unveiling exactly what know&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18655v1-abstract-full').style.display = 'inline'; document.getElementById('2404.18655v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.18655v1-abstract-full" style="display: none;"> Language Models (LMs) acquire parametric knowledge from their training process, embedding it within their weights. The increasing scalability of LMs, however, poses significant challenges for understanding a model&#39;s inner workings and further for updating or correcting this embedded knowledge without the significant cost of retraining. This underscores the importance of unveiling exactly what knowledge is stored and its association with specific model components. Instance Attribution (IA) and Neuron Attribution (NA) offer insights into this training-acquired knowledge, though they have not been compared systematically. Our study introduces a novel evaluation framework to quantify and compare the knowledge revealed by IA and NA. To align the results of the methods we introduce the attribution method NA-Instances to apply NA for retrieving influential training instances, and IA-Neurons to discover important neurons of influential instances discovered by IA. We further propose a comprehensive list of faithfulness tests to evaluate the comprehensiveness and sufficiency of the explanations provided by both methods. Through extensive experiments and analysis, we demonstrate that NA generally reveals more diverse and comprehensive information regarding the LM&#39;s parametric knowledge compared to IA. Nevertheless, IA provides unique and valuable insights into the LM&#39;s parametric knowledge, which are not revealed by NA. Our findings further suggest the potential of a synergistic approach of combining the diverse findings of IA and NA for a more holistic understanding of an LM&#39;s parametric knowledge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18655v1-abstract-full').style.display = 'none'; document.getElementById('2404.18655v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T50 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.14177">arXiv:2402.14177</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.14177">pdf</a>, <a href="https://arxiv.org/format/2402.14177">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Investigating Human Values in Online Communities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Borenstein%2C+N">Nadav Borenstein</a>, <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Kaffee%2C+L">Lucie-Aim茅e Kaffee</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.14177v3-abstract-short" style="display: inline;"> Studying human values is instrumental for cross-cultural research, enabling a better understanding of preferences and behaviour of society at large and communities therein. To study the dynamics of communities online, we propose a method to computationally analyse values present on Reddit. Our method allows analysis at scale, complementing survey based approaches. We train a value relevance and a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14177v3-abstract-full').style.display = 'inline'; document.getElementById('2402.14177v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.14177v3-abstract-full" style="display: none;"> Studying human values is instrumental for cross-cultural research, enabling a better understanding of preferences and behaviour of society at large and communities therein. To study the dynamics of communities online, we propose a method to computationally analyse values present on Reddit. Our method allows analysis at scale, complementing survey based approaches. We train a value relevance and a value polarity classifier, which we thoroughly evaluate using in-domain and out-of-domain human annotations. Using these, we automatically annotate over six million posts across 12k subreddits with Schwartz values. Our analysis unveils both previously recorded and novel insights into the values prevalent within various online communities. For instance, we discover a very negative stance towards conformity in the Vegan and AbolishTheMonarchy subreddits. Additionally, our study of geographically specific subreddits highlights the correlation between traditional values and conservative U.S. states. Through our work, we demonstrate how our dataset and method can be used as a complementary tool for qualitative study of online communication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.14177v3-abstract-full').style.display = 'none'; document.getElementById('2402.14177v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.13006">arXiv:2402.13006</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.13006">pdf</a>, <a href="https://arxiv.org/format/2402.13006">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Investigating the Impact of Model Instability on Explanations and Uncertainty </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Marjanovi%C4%87%2C+S+V">Sara Vera Marjanovi膰</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Lioma%2C+C">Christina Lioma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.13006v2-abstract-short" style="display: inline;"> Explainable AI methods facilitate the understanding of model behaviour, yet, small, imperceptible perturbations to inputs can vastly distort explanations. As these explanations are typically evaluated holistically, before model deployment, it is difficult to assess when a particular explanation is trustworthy. Some studies have tried to create confidence estimators for explanations, but none have&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13006v2-abstract-full').style.display = 'inline'; document.getElementById('2402.13006v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.13006v2-abstract-full" style="display: none;"> Explainable AI methods facilitate the understanding of model behaviour, yet, small, imperceptible perturbations to inputs can vastly distort explanations. As these explanations are typically evaluated holistically, before model deployment, it is difficult to assess when a particular explanation is trustworthy. Some studies have tried to create confidence estimators for explanations, but none have investigated an existing link between uncertainty and explanation quality. We artificially simulate epistemic uncertainty in text input by introducing noise at inference time. In this large-scale empirical study, we insert different levels of noise perturbations and measure the effect on the output of pre-trained language models and different uncertainty metrics. Realistic perturbations have minimal effect on performance and explanations, yet masking has a drastic effect. We find that high uncertainty doesn&#39;t necessarily imply low explanation plausibility; the correlation between the two metrics can be moderately positive when noise is exposed during the training process. This suggests that noise-augmented models may be better at identifying salient tokens when uncertain. Furthermore, when predictive and epistemic uncertainty measures are over-confident, the robustness of a saliency map to perturbation can indicate model stability issues. Integrated Gradients shows the overall greatest robustness to perturbation, while still showing model-specific patterns in performance; however, this phenomenon is limited to smaller Transformer-based language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13006v2-abstract-full').style.display = 'none'; document.getElementById('2402.13006v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.12431">arXiv:2402.12431</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.12431">pdf</a>, <a href="https://arxiv.org/format/2402.12431">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Understanding Fine-grained Distortions in Reports of Scientific Findings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=W%C3%BChrl%2C+A">Amelie W眉hrl</a>, <a href="/search/?searchtype=author&amp;query=Wright%2C+D">Dustin Wright</a>, <a href="/search/?searchtype=author&amp;query=Klinger%2C+R">Roman Klinger</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.12431v1-abstract-short" style="display: inline;"> Distorted science communication harms individuals and society as it can lead to unhealthy behavior change and decrease trust in scientific institutions. Given the rapidly increasing volume of science communication in recent years, a fine-grained understanding of how findings from scientific publications are reported to the general public, and methods to detect distortions from the original work au&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12431v1-abstract-full').style.display = 'inline'; document.getElementById('2402.12431v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.12431v1-abstract-full" style="display: none;"> Distorted science communication harms individuals and society as it can lead to unhealthy behavior change and decrease trust in scientific institutions. Given the rapidly increasing volume of science communication in recent years, a fine-grained understanding of how findings from scientific publications are reported to the general public, and methods to detect distortions from the original work automatically, are crucial. Prior work focused on individual aspects of distortions or worked with unpaired data. In this work, we make three foundational contributions towards addressing this problem: (1) annotating 1,600 instances of scientific findings from academic papers paired with corresponding findings as reported in news articles and tweets wrt. four characteristics: causality, certainty, generality and sensationalism; (2) establishing baselines for automatically detecting these characteristics; and (3) analyzing the prevalence of changes in these characteristics in both human-annotated and large-scale unlabeled data. Our results show that scientific findings frequently undergo subtle distortions when reported. Tweets distort findings more often than science news reports. Detecting fine-grained distortions automatically poses a challenging task. In our experiments, fine-tuned task-specific models consistently outperform few-shot LLM prompting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12431v1-abstract-full').style.display = 'none'; document.getElementById('2402.12431v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.14440">arXiv:2401.14440</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.14440">pdf</a>, <a href="https://arxiv.org/format/2401.14440">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Semantic Sensitivities and Inconsistent Predictions: Measuring the Fragility of NLI Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/?searchtype=author&amp;query=Liu%2C+Z">Zhaoqi Liu</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.14440v2-abstract-short" style="display: inline;"> Recent studies of the emergent capabilities of transformer-based Natural Language Understanding (NLU) models have indicated that they have an understanding of lexical and compositional semantics. We provide evidence that suggests these claims should be taken with a grain of salt: we find that state-of-the-art Natural Language Inference (NLI) models are sensitive towards minor semantics preserving&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14440v2-abstract-full').style.display = 'inline'; document.getElementById('2401.14440v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.14440v2-abstract-full" style="display: none;"> Recent studies of the emergent capabilities of transformer-based Natural Language Understanding (NLU) models have indicated that they have an understanding of lexical and compositional semantics. We provide evidence that suggests these claims should be taken with a grain of salt: we find that state-of-the-art Natural Language Inference (NLI) models are sensitive towards minor semantics preserving surface-form variations, which lead to sizable inconsistent model decisions during inference. Notably, this behaviour differs from valid and in-depth comprehension of compositional semantics, however does neither emerge when evaluating model accuracy on standard benchmarks nor when probing for syntactic, monotonic, and logically robust reasoning. We propose a novel framework to measure the extent of semantic sensitivity. To this end, we evaluate NLI models on adversarially generated examples containing minor semantics-preserving surface-form input noise. This is achieved using conditional text generation, with the explicit condition that the NLI model predicts the relationship between the original and adversarial inputs as a symmetric equivalence entailment. We systematically study the effects of the phenomenon across NLI models for $\textbf{in-}$ and $\textbf{out-of-}$ domain settings. Our experiments show that semantic sensitivity causes performance degradations of $12.92\%$ and $23.71\%$ average over $\textbf{in-}$ and $\textbf{out-of-}$ domain settings, respectively. We further perform ablation studies, analysing this phenomenon across models, datasets, and variations in inference and show that semantic sensitivity can lead to major inconsistency within model predictions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14440v2-abstract-full').style.display = 'none'; document.getElementById('2401.14440v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EACL 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.18567">arXiv:2311.18567</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.18567">pdf</a>, <a href="https://arxiv.org/format/2311.18567">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> The Causal Influence of Grammatical Gender on Distributional Semantics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Sta%C5%84czak%2C+K">Karolina Sta艅czak</a>, <a href="/search/?searchtype=author&amp;query=Du%2C+K">Kevin Du</a>, <a href="/search/?searchtype=author&amp;query=Williams%2C+A">Adina Williams</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Cotterell%2C+R">Ryan Cotterell</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.18567v2-abstract-short" style="display: inline;"> How much meaning influences gender assignment across languages is an active area of research in linguistics and cognitive science. We can view current approaches as aiming to determine where gender assignment falls on a spectrum, from being fully arbitrarily determined to being largely semantically determined. For the latter case, there is a formulation of the neo-Whorfian hypothesis, which claims&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.18567v2-abstract-full').style.display = 'inline'; document.getElementById('2311.18567v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.18567v2-abstract-full" style="display: none;"> How much meaning influences gender assignment across languages is an active area of research in linguistics and cognitive science. We can view current approaches as aiming to determine where gender assignment falls on a spectrum, from being fully arbitrarily determined to being largely semantically determined. For the latter case, there is a formulation of the neo-Whorfian hypothesis, which claims that even inanimate noun gender influences how people conceive of and talk about objects (using the choice of adjective used to modify inanimate nouns as a proxy for meaning). We offer a novel, causal graphical model that jointly represents the interactions between a noun&#39;s grammatical gender, its meaning, and adjective choice. In accordance with past results, we find a significant relationship between the gender of nouns and the adjectives that modify them. However, when we control for the meaning of the noun, the relationship between grammatical gender and adjective choice is near zero and insignificant. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.18567v2-abstract-full').style.display = 'none'; document.getElementById('2311.18567v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.17627">arXiv:2311.17627</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.17627">pdf</a>, <a href="https://arxiv.org/format/2311.17627">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Invisible Women in Digital Diplomacy: A Multidimensional Framework for Online Gender Bias Against Women Ambassadors Worldwide </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Golovchenko%2C+Y">Yevgeniy Golovchenko</a>, <a href="/search/?searchtype=author&amp;query=Sta%C5%84czak%2C+K">Karolina Sta艅czak</a>, <a href="/search/?searchtype=author&amp;query=Adler-Nissen%2C+R">Rebecca Adler-Nissen</a>, <a href="/search/?searchtype=author&amp;query=Wangen%2C+P">Patrice Wangen</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.17627v1-abstract-short" style="display: inline;"> Despite mounting evidence that women in foreign policy often bear the brunt of online hostility, the extent of online gender bias against diplomats remains unexplored. This paper offers the first global analysis of the treatment of women diplomats on social media. Introducing a multidimensional and multilingual methodology for studying online gender bias, it focuses on three critical elements: gen&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.17627v1-abstract-full').style.display = 'inline'; document.getElementById('2311.17627v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.17627v1-abstract-full" style="display: none;"> Despite mounting evidence that women in foreign policy often bear the brunt of online hostility, the extent of online gender bias against diplomats remains unexplored. This paper offers the first global analysis of the treatment of women diplomats on social media. Introducing a multidimensional and multilingual methodology for studying online gender bias, it focuses on three critical elements: gendered language, negativity in tweets directed at diplomats, and the visibility of women diplomats. Our unique dataset encompasses ambassadors from 164 countries, their tweets, and the direct responses to these tweets in 65 different languages. Using automated content and sentiment analysis, our findings reveal a crucial gender bias. The language in responses to diplomatic tweets is only mildly gendered and largely pertains to international affairs and, generally, women ambassadors do not receive more negative reactions to their tweets than men, yet the pronounced discrepancy in online visibility stands out as a significant form of gender bias. Women receive a staggering 66.4% fewer retweets than men. By unraveling the invisibility that obscures women diplomats on social media, we hope to spark further research on online bias in international politics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.17627v1-abstract-full').style.display = 'none'; document.getElementById('2311.17627v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.09090">arXiv:2311.09090</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.09090">pdf</a>, <a href="https://arxiv.org/format/2311.09090">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Social Bias Probing: Fairness Benchmarking for Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Manerba%2C+M+M">Marta Marchiori Manerba</a>, <a href="/search/?searchtype=author&amp;query=Sta%C5%84czak%2C+K">Karolina Sta艅czak</a>, <a href="/search/?searchtype=author&amp;query=Guidotti%2C+R">Riccardo Guidotti</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.09090v4-abstract-short" style="display: inline;"> While the impact of social biases in language models has been recognized, prior methods for bias evaluation have been limited to binary association tests on small datasets, limiting our understanding of bias complexities. This paper proposes a novel framework for probing language models for social biases by assessing disparate treatment, which involves treating individuals differently according to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09090v4-abstract-full').style.display = 'inline'; document.getElementById('2311.09090v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.09090v4-abstract-full" style="display: none;"> While the impact of social biases in language models has been recognized, prior methods for bias evaluation have been limited to binary association tests on small datasets, limiting our understanding of bias complexities. This paper proposes a novel framework for probing language models for social biases by assessing disparate treatment, which involves treating individuals differently according to their affiliation with a sensitive demographic group. We curate SoFa, a large-scale benchmark designed to address the limitations of existing fairness collections. SoFa expands the analysis beyond the binary comparison of stereotypical versus anti-stereotypical identities to include a diverse range of identities and stereotypes. Comparing our methodology with existing benchmarks, we reveal that biases within language models are more nuanced than acknowledged, indicating a broader scope of encoded biases than previously recognized. Benchmarking LMs on SoFa, we expose how identities expressing different religions lead to the most pronounced disparate treatments across all models. Finally, our findings indicate that real-life adversities faced by various groups such as women and people with disabilities are mirrored in the behavior of these models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09090v4-abstract-full').style.display = 'none'; document.getElementById('2311.09090v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.09000">arXiv:2311.09000</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.09000">pdf</a>, <a href="https://arxiv.org/format/2311.09000">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Factcheck-Bench: Fine-Grained Evaluation Benchmark for Automatic Fact-checkers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Wang%2C+Y">Yuxia Wang</a>, <a href="/search/?searchtype=author&amp;query=Reddy%2C+R+G">Revanth Gangi Reddy</a>, <a href="/search/?searchtype=author&amp;query=Mujahid%2C+Z+M">Zain Muhammad Mujahid</a>, <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Rubashevskii%2C+A">Aleksandr Rubashevskii</a>, <a href="/search/?searchtype=author&amp;query=Geng%2C+J">Jiahui Geng</a>, <a href="/search/?searchtype=author&amp;query=Afzal%2C+O+M">Osama Mohammed Afzal</a>, <a href="/search/?searchtype=author&amp;query=Pan%2C+L">Liangming Pan</a>, <a href="/search/?searchtype=author&amp;query=Borenstein%2C+N">Nadav Borenstein</a>, <a href="/search/?searchtype=author&amp;query=Pillai%2C+A">Aditya Pillai</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Gurevych%2C+I">Iryna Gurevych</a>, <a href="/search/?searchtype=author&amp;query=Nakov%2C+P">Preslav Nakov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.09000v3-abstract-short" style="display: inline;"> The increased use of large language models (LLMs) across a variety of real-world applications calls for mechanisms to verify the factual accuracy of their outputs. In this work, we present a holistic end-to-end solution for annotating the factuality of LLM-generated responses, which encompasses a multi-stage annotation scheme designed to yield detailed labels concerning the verifiability and factu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09000v3-abstract-full').style.display = 'inline'; document.getElementById('2311.09000v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.09000v3-abstract-full" style="display: none;"> The increased use of large language models (LLMs) across a variety of real-world applications calls for mechanisms to verify the factual accuracy of their outputs. In this work, we present a holistic end-to-end solution for annotating the factuality of LLM-generated responses, which encompasses a multi-stage annotation scheme designed to yield detailed labels concerning the verifiability and factual inconsistencies found in LLM outputs. We further construct an open-domain document-level factuality benchmark in three-level granularity: claim, sentence and document, aiming to facilitate the evaluation of automatic fact-checking systems. Preliminary experiments show that FacTool, FactScore and Perplexity.ai are struggling to identify false claims, with the best F1=0.63 by this annotation solution based on GPT-4. Annotation tool, benchmark and code are available at https://github.com/yuxiaw/Factcheck-GPT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09000v3-abstract-full').style.display = 'none'; document.getElementById('2311.09000v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.01270">arXiv:2311.01270</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.01270">pdf</a>, <a href="https://arxiv.org/format/2311.01270">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> People Make Better Edits: Measuring the Efficacy of LLM-Generated Counterfactually Augmented Data for Harmful Language Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Sen%2C+I">Indira Sen</a>, <a href="/search/?searchtype=author&amp;query=Assenmacher%2C+D">Dennis Assenmacher</a>, <a href="/search/?searchtype=author&amp;query=Samory%2C+M">Mattia Samory</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=van+der+Aalst%2C+W">Wil van der Aalst</a>, <a href="/search/?searchtype=author&amp;query=Wagner%2C+C">Claudia Wagner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.01270v3-abstract-short" style="display: inline;"> NLP models are used in a variety of critical social computing tasks, such as detecting sexist, racist, or otherwise hateful content. Therefore, it is imperative that these models are robust to spurious features. Past work has attempted to tackle such spurious features using training data augmentation, including Counterfactually Augmented Data (CADs). CADs introduce minimal changes to existing trai&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.01270v3-abstract-full').style.display = 'inline'; document.getElementById('2311.01270v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.01270v3-abstract-full" style="display: none;"> NLP models are used in a variety of critical social computing tasks, such as detecting sexist, racist, or otherwise hateful content. Therefore, it is imperative that these models are robust to spurious features. Past work has attempted to tackle such spurious features using training data augmentation, including Counterfactually Augmented Data (CADs). CADs introduce minimal changes to existing training data points and flip their labels; training on them may reduce model dependency on spurious features. However, manually generating CADs can be time-consuming and expensive. Hence in this work, we assess if this task can be automated using generative NLP models. We automatically generate CADs using Polyjuice, ChatGPT, and Flan-T5, and evaluate their usefulness in improving model robustness compared to manually-generated CADs. By testing both model performance on multiple out-of-domain test sets and individual data point efficacy, our results show that while manual CADs are still the most effective, CADs generated by ChatGPT come a close second. One key reason for the lower performance of automated methods is that the changes they introduce are often insufficient to flip the original label. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.01270v3-abstract-full').style.display = 'none'; document.getElementById('2311.01270v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint of EMNLP&#39;23 paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.18343">arXiv:2310.18343</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.18343">pdf</a>, <a href="https://arxiv.org/format/2310.18343">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PHD: Pixel-Based Language Modeling of Historical Documents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Borenstein%2C+N">Nadav Borenstein</a>, <a href="/search/?searchtype=author&amp;query=Rust%2C+P">Phillip Rust</a>, <a href="/search/?searchtype=author&amp;query=Elliott%2C+D">Desmond Elliott</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.18343v2-abstract-short" style="display: inline;"> The digitisation of historical documents has provided historians with unprecedented research opportunities. Yet, the conventional approach to analysing historical documents involves converting them from images to text using OCR, a process that overlooks the potential benefits of treating them as images and introduces high levels of noise. To bridge this gap, we take advantage of recent advancement&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18343v2-abstract-full').style.display = 'inline'; document.getElementById('2310.18343v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.18343v2-abstract-full" style="display: none;"> The digitisation of historical documents has provided historians with unprecedented research opportunities. Yet, the conventional approach to analysing historical documents involves converting them from images to text using OCR, a process that overlooks the potential benefits of treating them as images and introduces high levels of noise. To bridge this gap, we take advantage of recent advancements in pixel-based language models trained to reconstruct masked patches of pixels instead of predicting token distributions. Due to the scarcity of real historical scans, we propose a novel method for generating synthetic scans to resemble real historical documents. We then pre-train our model, PHD, on a combination of synthetic scans and real historical newspapers from the 1700-1900 period. Through our experiments, we demonstrate that PHD exhibits high proficiency in reconstructing masked image patches and provide evidence of our model&#39;s noteworthy language understanding capabilities. Notably, we successfully apply our model to a historical QA task, highlighting its usefulness in this domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18343v2-abstract-full').style.display = 'none'; document.getElementById('2310.18343v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the main conference of EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.13506">arXiv:2310.13506</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.13506">pdf</a>, <a href="https://arxiv.org/format/2310.13506">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Explaining Interactions Between Text Spans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Choudhury%2C+S+R">Sagnik Ray Choudhury</a>, <a href="/search/?searchtype=author&amp;query=Atanasova%2C+P">Pepa Atanasova</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.13506v1-abstract-short" style="display: inline;"> Reasoning over spans of tokens from different parts of the input is essential for natural language understanding (NLU) tasks such as fact-checking (FC), machine reading comprehension (MRC) or natural language inference (NLI). However, existing highlight-based explanations primarily focus on identifying individual important tokens or interactions only between adjacent tokens or tuples of tokens. Mo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13506v1-abstract-full').style.display = 'inline'; document.getElementById('2310.13506v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.13506v1-abstract-full" style="display: none;"> Reasoning over spans of tokens from different parts of the input is essential for natural language understanding (NLU) tasks such as fact-checking (FC), machine reading comprehension (MRC) or natural language inference (NLI). However, existing highlight-based explanations primarily focus on identifying individual important tokens or interactions only between adjacent tokens or tuples of tokens. Most notably, there is a lack of annotations capturing the human decision-making process w.r.t. the necessary interactions for informed decision-making in such tasks. To bridge this gap, we introduce SpanEx, a multi-annotator dataset of human span interaction explanations for two NLU tasks: NLI and FC. We then investigate the decision-making processes of multiple fine-tuned large language models in terms of the employed connections between spans in separate parts of the input and compare them to the human reasoning processes. Finally, we present a novel community detection based unsupervised method to extract such interaction explanations from a model&#39;s inner workings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13506v1-abstract-full').style.display = 'none'; document.getElementById('2310.13506v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">code: https://github.com/copenlu/spanex , dataset: https://huggingface.co/datasets/copenlu/spanex. Accepted EMNLP 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.05779">arXiv:2310.05779</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.05779">pdf</a>, <a href="https://arxiv.org/format/2310.05779">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Why Should This Article Be Deleted? Transparent Stance Detection in Multilingual Wikipedia Editor Discussions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Kaffee%2C+L">Lucie-Aim茅e Kaffee</a>, <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.05779v2-abstract-short" style="display: inline;"> The moderation of content on online platforms is usually non-transparent. On Wikipedia, however, this discussion is carried out publicly and the editors are encouraged to use the content moderation policies as explanations for making moderation decisions. Currently, only a few comments explicitly mention those policies -- 20% of the English ones, but as few as 2% of the German and Turkish comments&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05779v2-abstract-full').style.display = 'inline'; document.getElementById('2310.05779v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.05779v2-abstract-full" style="display: none;"> The moderation of content on online platforms is usually non-transparent. On Wikipedia, however, this discussion is carried out publicly and the editors are encouraged to use the content moderation policies as explanations for making moderation decisions. Currently, only a few comments explicitly mention those policies -- 20% of the English ones, but as few as 2% of the German and Turkish comments. To aid in this process of understanding how content is moderated, we construct a novel multilingual dataset of Wikipedia editor discussions along with their reasoning in three languages. The dataset contains the stances of the editors (keep, delete, merge, comment), along with the stated reason, and a content moderation policy, for each edit decision. We demonstrate that stance and corresponding reason (policy) can be predicted jointly with a high degree of accuracy, adding transparency to the decision-making process. We release both our joint prediction models and the multilingual content moderation dataset for further research on automated transparent content moderation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05779v2-abstract-full').style.display = 'none'; document.getElementById('2310.05779v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This submission has been accepted to 2023 Conference on Empirical Methods in Natural Language Processing (EMNLP 2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.05189">arXiv:2310.05189</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.05189">pdf</a>, <a href="https://arxiv.org/ps/2310.05189">ps</a>, <a href="https://arxiv.org/format/2310.05189">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Factuality Challenges in the Era of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Baldwin%2C+T">Timothy Baldwin</a>, <a href="/search/?searchtype=author&amp;query=Cha%2C+M">Meeyoung Cha</a>, <a href="/search/?searchtype=author&amp;query=Chakraborty%2C+T">Tanmoy Chakraborty</a>, <a href="/search/?searchtype=author&amp;query=Ciampaglia%2C+G+L">Giovanni Luca Ciampaglia</a>, <a href="/search/?searchtype=author&amp;query=Corney%2C+D">David Corney</a>, <a href="/search/?searchtype=author&amp;query=DiResta%2C+R">Renee DiResta</a>, <a href="/search/?searchtype=author&amp;query=Ferrara%2C+E">Emilio Ferrara</a>, <a href="/search/?searchtype=author&amp;query=Hale%2C+S">Scott Hale</a>, <a href="/search/?searchtype=author&amp;query=Halevy%2C+A">Alon Halevy</a>, <a href="/search/?searchtype=author&amp;query=Hovy%2C+E">Eduard Hovy</a>, <a href="/search/?searchtype=author&amp;query=Ji%2C+H">Heng Ji</a>, <a href="/search/?searchtype=author&amp;query=Menczer%2C+F">Filippo Menczer</a>, <a href="/search/?searchtype=author&amp;query=Miguez%2C+R">Ruben Miguez</a>, <a href="/search/?searchtype=author&amp;query=Nakov%2C+P">Preslav Nakov</a>, <a href="/search/?searchtype=author&amp;query=Scheufele%2C+D">Dietram Scheufele</a>, <a href="/search/?searchtype=author&amp;query=Sharma%2C+S">Shivam Sharma</a>, <a href="/search/?searchtype=author&amp;query=Zagni%2C+G">Giovanni Zagni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.05189v2-abstract-short" style="display: inline;"> The emergence of tools based on Large Language Models (LLMs), such as OpenAI&#39;s ChatGPT, Microsoft&#39;s Bing Chat, and Google&#39;s Bard, has garnered immense public attention. These incredibly useful, natural-sounding tools mark significant advances in natural language generation, yet they exhibit a propensity to generate false, erroneous, or misleading content -- commonly referred to as &#34;hallucinations.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05189v2-abstract-full').style.display = 'inline'; document.getElementById('2310.05189v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.05189v2-abstract-full" style="display: none;"> The emergence of tools based on Large Language Models (LLMs), such as OpenAI&#39;s ChatGPT, Microsoft&#39;s Bing Chat, and Google&#39;s Bard, has garnered immense public attention. These incredibly useful, natural-sounding tools mark significant advances in natural language generation, yet they exhibit a propensity to generate false, erroneous, or misleading content -- commonly referred to as &#34;hallucinations.&#34; Moreover, LLMs can be exploited for malicious applications, such as generating false but credible-sounding content and profiles at scale. This poses a significant challenge to society in terms of the potential deception of users and the increasing dissemination of inaccurate information. In light of these risks, we explore the kinds of technological innovations, regulatory reforms, and AI literacy initiatives needed from fact-checkers, news organizations, and the broader research and policy communities. By identifying the risks, the imminent threats, and some viable solutions, we seek to shed light on navigating various aspects of veracity in the era of generative AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05189v2-abstract-full').style.display = 'none'; document.getElementById('2310.05189v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Our article offers a comprehensive examination of the challenges and risks associated with Large Language Models (LLMs), focusing on their potential impact on the veracity of information in today&#39;s digital landscape</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.00765">arXiv:2306.00765</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.00765">pdf</a>, <a href="https://arxiv.org/format/2306.00765">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2023.acl-long.752">10.18653/v1/2023.acl-long.752 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Topic-Guided Sampling For Data-Efficient Multi-Domain Stance Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.00765v1-abstract-short" style="display: inline;"> Stance Detection is concerned with identifying the attitudes expressed by an author towards a target of interest. This task spans a variety of domains ranging from social media opinion identification to detecting the stance for a legal claim. However, the framing of the task varies within these domains, in terms of the data collection protocol, the label dictionary and the number of available anno&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00765v1-abstract-full').style.display = 'inline'; document.getElementById('2306.00765v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.00765v1-abstract-full" style="display: none;"> Stance Detection is concerned with identifying the attitudes expressed by an author towards a target of interest. This task spans a variety of domains ranging from social media opinion identification to detecting the stance for a legal claim. However, the framing of the task varies within these domains, in terms of the data collection protocol, the label dictionary and the number of available annotations. Furthermore, these stance annotations are significantly imbalanced on a per-topic and inter-topic basis. These make multi-domain stance detection a challenging task, requiring standardization and domain adaptation. To overcome this challenge, we propose $\textbf{T}$opic $\textbf{E}$fficient $\textbf{St}$anc$\textbf{E}$ $\textbf{D}$etection (TESTED), consisting of a topic-guided diversity sampling technique and a contrastive objective that is used for fine-tuning a stance classifier. We evaluate the method on an existing benchmark of $16$ datasets with in-domain, i.e. all topics seen and out-of-domain, i.e. unseen topics, experiments. The results show that our method outperforms the state-of-the-art with an average of $3.5$ F1 points increase in-domain, and is more generalizable with an averaged increase of $10.2$ F1 on out-of-domain evaluation while using $\leq10\%$ of the training data. We show that our sampling technique mitigates both inter- and per-topic class imbalances. Finally, our analysis demonstrates that the contrastive learning objective allows the model a more pronounced segmentation of samples with varying labels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00765v1-abstract-full').style.display = 'none'; document.getElementById('2306.00765v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2023 (Oral)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.18029">arXiv:2305.18029</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.18029">pdf</a>, <a href="https://arxiv.org/format/2305.18029">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Faithfulness Tests for Natural Language Explanations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Atanasova%2C+P">Pepa Atanasova</a>, <a href="/search/?searchtype=author&amp;query=Camburu%2C+O">Oana-Maria Camburu</a>, <a href="/search/?searchtype=author&amp;query=Lioma%2C+C">Christina Lioma</a>, <a href="/search/?searchtype=author&amp;query=Lukasiewicz%2C+T">Thomas Lukasiewicz</a>, <a href="/search/?searchtype=author&amp;query=Simonsen%2C+J+G">Jakob Grue Simonsen</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.18029v2-abstract-short" style="display: inline;"> Explanations of neural models aim to reveal a model&#39;s decision-making process for its predictions. However, recent work shows that current methods giving explanations such as saliency maps or counterfactuals can be misleading, as they are prone to present reasons that are unfaithful to the model&#39;s inner workings. This work explores the challenging question of evaluating the faithfulness of natural&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.18029v2-abstract-full').style.display = 'inline'; document.getElementById('2305.18029v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.18029v2-abstract-full" style="display: none;"> Explanations of neural models aim to reveal a model&#39;s decision-making process for its predictions. However, recent work shows that current methods giving explanations such as saliency maps or counterfactuals can be misleading, as they are prone to present reasons that are unfaithful to the model&#39;s inner workings. This work explores the challenging question of evaluating the faithfulness of natural language explanations (NLEs). To this end, we present two tests. First, we propose a counterfactual input editor for inserting reasons that lead to counterfactual predictions but are not reflected by the NLEs. Second, we reconstruct inputs from the reasons stated in the generated NLEs and check how often they lead to the same predictions. Our tests can evaluate emerging NLE models, proving a fundamental tool in the development of faithful NLEs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.18029v2-abstract-full').style.display = 'none'; document.getElementById('2305.18029v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Short paper, ACL 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T50 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> The 61st Annual Meeting of the Association for Computational Linguistics (ACL 2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.12376">arXiv:2305.12376</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.12376">pdf</a>, <a href="https://arxiv.org/format/2305.12376">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Measuring Intersectional Biases in Historical Documents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Borenstein%2C+N">Nadav Borenstein</a>, <a href="/search/?searchtype=author&amp;query=Sta%C5%84czak%2C+K">Karolina Sta艅czak</a>, <a href="/search/?searchtype=author&amp;query=Rolskov%2C+T">Thea Rolskov</a>, <a href="/search/?searchtype=author&amp;query=Perez%2C+N+d+S">Nat谩lia da Silva Perez</a>, <a href="/search/?searchtype=author&amp;query=K%C3%A4fer%2C+N+K">Natacha Klein K盲fer</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.12376v1-abstract-short" style="display: inline;"> Data-driven analyses of biases in historical texts can help illuminate the origin and development of biases prevailing in modern society. However, digitised historical documents pose a challenge for NLP practitioners as these corpora suffer from errors introduced by optical character recognition (OCR) and are written in an archaic language. In this paper, we investigate the continuities and tran&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12376v1-abstract-full').style.display = 'inline'; document.getElementById('2305.12376v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.12376v1-abstract-full" style="display: none;"> Data-driven analyses of biases in historical texts can help illuminate the origin and development of biases prevailing in modern society. However, digitised historical documents pose a challenge for NLP practitioners as these corpora suffer from errors introduced by optical character recognition (OCR) and are written in an archaic language. In this paper, we investigate the continuities and transformations of bias in historical newspapers published in the Caribbean during the colonial era (18th to 19th centuries). Our analyses are performed along the axes of gender, race, and their intersection. We examine these biases by conducting a temporal study in which we measure the development of lexical associations using distributional semantics models and word embeddings. Further, we evaluate the effectiveness of techniques designed to process OCR-generated data and assess their stability when trained on and applied to the noisy historical newspapers. We find that there is a trade-off between the stability of the word embeddings and their compatibility with the historical dataset. We provide evidence that gender and racial biases are interdependent, and their intersection triggers distinct effects. These findings align with the theory of intersectionality, which stresses that biases affecting people with multiple marginalised identities compound to more than the sum of their constituents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.12376v1-abstract-full').style.display = 'none'; document.getElementById('2305.12376v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of ACL2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10928">arXiv:2305.10928</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.10928">pdf</a>, <a href="https://arxiv.org/format/2305.10928">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multilingual Event Extraction from Historical Newspaper Adverts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Borenstein%2C+N">Nadav Borenstein</a>, <a href="/search/?searchtype=author&amp;query=Perez%2C+N+d+S">Natalia da Silva Perez</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10928v1-abstract-short" style="display: inline;"> NLP methods can aid historians in analyzing textual materials in greater volumes than manually feasible. Developing such methods poses substantial challenges though. First, acquiring large, annotated historical datasets is difficult, as only domain experts can reliably label them. Second, most available off-the-shelf NLP models are trained on modern language texts, rendering them significantly les&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10928v1-abstract-full').style.display = 'inline'; document.getElementById('2305.10928v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10928v1-abstract-full" style="display: none;"> NLP methods can aid historians in analyzing textual materials in greater volumes than manually feasible. Developing such methods poses substantial challenges though. First, acquiring large, annotated historical datasets is difficult, as only domain experts can reliably label them. Second, most available off-the-shelf NLP models are trained on modern language texts, rendering them significantly less effective when applied to historical corpora. This is particularly problematic for less well studied tasks, and for languages other than English. This paper addresses these challenges while focusing on the under-explored task of event extraction from a novel domain of historical texts. We introduce a new multilingual dataset in English, French, and Dutch composed of newspaper ads from the early modern colonial period reporting on enslaved people who liberated themselves from enslavement. We find that: 1) even with scarce annotated data, it is possible to achieve surprisingly good results by formulating the problem as an extractive QA task and leveraging existing datasets and models for modern languages; and 2) cross-lingual low-resource learning for historical languages is highly challenging, and machine translation of the historical datasets to the considered target languages is, in practice, often the best-performing solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10928v1-abstract-full').style.display = 'none'; document.getElementById('2305.10928v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the main track of ACL2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.08315">arXiv:2304.08315</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.08315">pdf</a>, <a href="https://arxiv.org/format/2304.08315">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Thorny Roses: Investigating the Dual Use Dilemma in Natural Language Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Kaffee%2C+L">Lucie-Aim茅e Kaffee</a>, <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Talat%2C+Z">Zeerak Talat</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.08315v3-abstract-short" style="display: inline;"> Dual use, the intentional, harmful reuse of technology and scientific artefacts, is a problem yet to be well-defined within the context of Natural Language Processing (NLP). However, as NLP technologies continue to advance and become increasingly widespread in society, their inner workings have become increasingly opaque. Therefore, understanding dual use concerns and potential ways of limiting th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.08315v3-abstract-full').style.display = 'inline'; document.getElementById('2304.08315v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.08315v3-abstract-full" style="display: none;"> Dual use, the intentional, harmful reuse of technology and scientific artefacts, is a problem yet to be well-defined within the context of Natural Language Processing (NLP). However, as NLP technologies continue to advance and become increasingly widespread in society, their inner workings have become increasingly opaque. Therefore, understanding dual use concerns and potential ways of limiting them is critical to minimising the potential harms of research and development. In this paper, we conduct a survey of NLP researchers and practitioners to understand the depth and their perspective of the problem as well as to assess existing available support. Based on the results of our survey, we offer a definition of dual use that is tailored to the needs of the NLP community. The survey revealed that a majority of researchers are concerned about the potential dual use of their research but only take limited action toward it. In light of the survey results, we discuss the current state and potential means for mitigating dual use in NLP and propose a checklist that can be integrated into existing conference ethics-frameworks, e.g., the ACL ethics checklist. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.08315v3-abstract-full').style.display = 'none'; document.getElementById('2304.08315v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.05783">arXiv:2304.05783</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.05783">pdf</a>, <a href="https://arxiv.org/format/2304.05783">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Measuring Gender Bias in West Slavic Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Martinkov%C3%A1%2C+S">Sandra Martinkov谩</a>, <a href="/search/?searchtype=author&amp;query=Sta%C5%84czak%2C+K">Karolina Sta艅czak</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.05783v3-abstract-short" style="display: inline;"> Pre-trained language models have been known to perpetuate biases from the underlying datasets to downstream tasks. However, these findings are predominantly based on monolingual language models for English, whereas there are few investigative studies of biases encoded in language models for languages beyond English. In this paper, we fill this gap by analysing gender bias in West Slavic language m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.05783v3-abstract-full').style.display = 'inline'; document.getElementById('2304.05783v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.05783v3-abstract-full" style="display: none;"> Pre-trained language models have been known to perpetuate biases from the underlying datasets to downstream tasks. However, these findings are predominantly based on monolingual language models for English, whereas there are few investigative studies of biases encoded in language models for languages beyond English. In this paper, we fill this gap by analysing gender bias in West Slavic language models. We introduce the first template-based dataset in Czech, Polish, and Slovak for measuring gender bias towards male, female and non-binary subjects. We complete the sentences using both mono- and multilingual language models and assess their suitability for the masked language modelling objective. Next, we measure gender bias encoded in West Slavic language models by quantifying the toxicity and genderness of the generated words. We find that these language models produce hurtful completions that depend on the subject&#39;s gender. Perhaps surprisingly, Czech, Slovak, and Polish language models produce more hurtful completions with men as subjects, which, upon inspection, we find is due to completions being related to violence, death, and sickness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.05783v3-abstract-full').style.display = 'none'; document.getElementById('2304.05783v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.02500">arXiv:2302.02500</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.02500">pdf</a>, <a href="https://arxiv.org/format/2302.02500">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TempEL: Linking Dynamically Evolving and Newly Emerging Entities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Zaporojets%2C+K">Klim Zaporojets</a>, <a href="/search/?searchtype=author&amp;query=Kaffee%2C+L">Lucie-Aimee Kaffee</a>, <a href="/search/?searchtype=author&amp;query=Deleu%2C+J">Johannes Deleu</a>, <a href="/search/?searchtype=author&amp;query=Demeester%2C+T">Thomas Demeester</a>, <a href="/search/?searchtype=author&amp;query=Develder%2C+C">Chris Develder</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.02500v1-abstract-short" style="display: inline;"> In our continuously evolving world, entities change over time and new, previously non-existing or unknown, entities appear. We study how this evolutionary scenario impacts the performance on a well established entity linking (EL) task. For that study, we introduce TempEL, an entity linking dataset that consists of time-stratified English Wikipedia snapshots from 2013 to 2022, from which we collect&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02500v1-abstract-full').style.display = 'inline'; document.getElementById('2302.02500v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.02500v1-abstract-full" style="display: none;"> In our continuously evolving world, entities change over time and new, previously non-existing or unknown, entities appear. We study how this evolutionary scenario impacts the performance on a well established entity linking (EL) task. For that study, we introduce TempEL, an entity linking dataset that consists of time-stratified English Wikipedia snapshots from 2013 to 2022, from which we collect both anchor mentions of entities, and these target entities&#39; descriptions. By capturing such temporal aspects, our newly introduced TempEL resource contrasts with currently existing entity linking datasets, which are composed of fixed mentions linked to a single static version of a target Knowledge Base (e.g., Wikipedia 2010 for CoNLL-AIDA). Indeed, for each of our collected temporal snapshots, TempEL contains links to entities that are continual, i.e., occur in all of the years, as well as completely new entities that appear for the first time at some point. Thus, we enable to quantify the performance of current state-of-the-art EL models for: (i) entities that are subject to changes over time in their Knowledge Base descriptions as well as their mentions&#39; contexts, and (ii) newly created entities that were previously non-existing (e.g., at the time the EL model was trained). Our experimental results show that in terms of temporal performance degradation, (i) continual entities suffer a decrease of up to 3.1% EL accuracy, while (ii) for new entities this accuracy drop is up to 17.9%. This highlights the challenge of the introduced TempEL dataset and opens new research prospects in the area of time-evolving entity disambiguation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02500v1-abstract-full').style.display = 'none'; document.getElementById('2302.02500v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.12313">arXiv:2301.12313</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2301.12313">pdf</a>, <a href="https://arxiv.org/format/2301.12313">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Adapting Neural Link Predictors for Data-Efficient Complex Query Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Arakelyan%2C+E">Erik Arakelyan</a>, <a href="/search/?searchtype=author&amp;query=Minervini%2C+P">Pasquale Minervini</a>, <a href="/search/?searchtype=author&amp;query=Daza%2C+D">Daniel Daza</a>, <a href="/search/?searchtype=author&amp;query=Cochez%2C+M">Michael Cochez</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.12313v3-abstract-short" style="display: inline;"> Answering complex queries on incomplete knowledge graphs is a challenging task where a model needs to answer complex logical queries in the presence of missing knowledge. Prior work in the literature has proposed to address this problem by designing architectures trained end-to-end for the complex query answering task with a reasoning process that is hard to interpret while requiring data and reso&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12313v3-abstract-full').style.display = 'inline'; document.getElementById('2301.12313v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.12313v3-abstract-full" style="display: none;"> Answering complex queries on incomplete knowledge graphs is a challenging task where a model needs to answer complex logical queries in the presence of missing knowledge. Prior work in the literature has proposed to address this problem by designing architectures trained end-to-end for the complex query answering task with a reasoning process that is hard to interpret while requiring data and resource-intensive training. Other lines of research have proposed re-using simple neural link predictors to answer complex queries, reducing the amount of training data by orders of magnitude while providing interpretable answers. The neural link predictor used in such approaches is not explicitly optimised for the complex query answering task, implying that its scores are not calibrated to interact together. We propose to address these problems via CQD$^{\mathcal{A}}$, a parameter-efficient score \emph{adaptation} model optimised to re-calibrate neural link prediction scores for the complex query answering task. While the neural link predictor is frozen, the adaptation component -- which only increases the number of model parameters by $0.03\%$ -- is trained on the downstream complex query answering task. Furthermore, the calibration component enables us to support reasoning over queries that include atomic negations, which was previously impossible with link predictors. In our experiments, CQD$^{\mathcal{A}}$ produces significantly more accurate results than current state-of-the-art methods, improving from $34.4$ to $35.1$ Mean Reciprocal Rank values averaged across all datasets and query types while using $\leq 30\%$ of the available training query types. We further show that CQD$^{\mathcal{A}}$ is data-efficient, achieving competitive results with only $1\%$ of the training complex queries, and robust in out-of-domain evaluations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12313v3-abstract-full').style.display = 'none'; document.getElementById('2301.12313v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.09409">arXiv:2212.09409</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.09409">pdf</a>, <a href="https://arxiv.org/format/2212.09409">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Multi-View Knowledge Distillation from Crowd Annotations for Out-of-Domain Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Wright%2C+D">Dustin Wright</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.09409v2-abstract-short" style="display: inline;"> Selecting an effective training signal for tasks in natural language processing is difficult: expert annotations are expensive, and crowd-sourced annotations may not be reliable. At the same time, recent work in NLP has demonstrated that learning from a distribution over labels acquired from crowd annotations can be effective. However, there are many ways to acquire such a distribution, and the pe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.09409v2-abstract-full').style.display = 'inline'; document.getElementById('2212.09409v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.09409v2-abstract-full" style="display: none;"> Selecting an effective training signal for tasks in natural language processing is difficult: expert annotations are expensive, and crowd-sourced annotations may not be reliable. At the same time, recent work in NLP has demonstrated that learning from a distribution over labels acquired from crowd annotations can be effective. However, there are many ways to acquire such a distribution, and the performance allotted by any one method can fluctuate based on the task and the amount of available crowd annotations, making it difficult to know a priori which distribution is best. This paper systematically analyzes this in the out-of-domain setting, adding to the NLP literature which has focused on in-domain evaluation, and proposes new methods for acquiring soft-labels from crowd-annotations by aggregating the distributions produced by existing methods. In particular, we propose to aggregate multiple-views of crowd annotations via temperature scaling and finding their Jensen-Shannon centroid. We demonstrate that these aggregation methods lead to the most consistent performance across four NLP tasks on out-of-domain test sets, mitigating fluctuations in performance from the individual distributions. Additionally, aggregation results in the most consistently well-calibrated uncertainty estimation. We argue that aggregating different views of crowd-annotations is an effective and minimal intervention to acquire soft-labels which induce robust classifiers despite the inconsistency of the individual soft-labeling methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.09409v2-abstract-full').style.display = 'none'; document.getElementById('2212.09409v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 4 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.14037">arXiv:2210.14037</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.14037">pdf</a>, <a href="https://arxiv.org/format/2210.14037">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Revisiting Softmax for Uncertainty Approximation in Text Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Holm%2C+A+N">Andreas Nugaard Holm</a>, <a href="/search/?searchtype=author&amp;query=Wright%2C+D">Dustin Wright</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.14037v2-abstract-short" style="display: inline;"> Uncertainty approximation in text classification is an important area with applications in domain adaptation and interpretability. One of the most widely used uncertainty approximation methods is Monte Carlo (MC) Dropout, which is computationally expensive as it requires multiple forward passes through the model. A cheaper alternative is to simply use the softmax based on a single forward pass wit&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.14037v2-abstract-full').style.display = 'inline'; document.getElementById('2210.14037v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.14037v2-abstract-full" style="display: none;"> Uncertainty approximation in text classification is an important area with applications in domain adaptation and interpretability. One of the most widely used uncertainty approximation methods is Monte Carlo (MC) Dropout, which is computationally expensive as it requires multiple forward passes through the model. A cheaper alternative is to simply use the softmax based on a single forward pass without dropout to estimate model uncertainty. However, prior work has indicated that these predictions tend to be overconfident. In this paper, we perform a thorough empirical analysis of these methods on five datasets with two base neural architectures in order to identify the trade-offs between the two. We compare both softmax and an efficient version of MC Dropout on their uncertainty approximations and downstream text classification performance, while weighing their runtime (cost) against performance (benefit). We find that, while MC dropout produces the best uncertainty approximations, using a simple softmax leads to competitive and in some cases better uncertainty estimation for text classification at a much lower computational cost, suggesting that softmax can in fact be a sufficient uncertainty estimate when computational resources are a concern. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.14037v2-abstract-full').style.display = 'none'; document.getElementById('2210.14037v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.13001">arXiv:2210.13001</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.13001">pdf</a>, <a href="https://arxiv.org/format/2210.13001">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Modeling Information Change in Science Communication with Semantically Matched Paraphrases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Wright%2C+D">Dustin Wright</a>, <a href="/search/?searchtype=author&amp;query=Pei%2C+J">Jiaxin Pei</a>, <a href="/search/?searchtype=author&amp;query=Jurgens%2C+D">David Jurgens</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.13001v1-abstract-short" style="display: inline;"> Whether the media faithfully communicate scientific information has long been a core issue to the science community. Automatically identifying paraphrased scientific findings could enable large-scale tracking and analysis of information changes in the science communication process, but this requires systems to understand the similarity between scientific information across multiple domains. To thi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.13001v1-abstract-full').style.display = 'inline'; document.getElementById('2210.13001v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.13001v1-abstract-full" style="display: none;"> Whether the media faithfully communicate scientific information has long been a core issue to the science community. Automatically identifying paraphrased scientific findings could enable large-scale tracking and analysis of information changes in the science communication process, but this requires systems to understand the similarity between scientific information across multiple domains. To this end, we present the SCIENTIFIC PARAPHRASE AND INFORMATION CHANGE DATASET (SPICED), the first paraphrase dataset of scientific findings annotated for degree of information change. SPICED contains 6,000 scientific finding pairs extracted from news stories, social media discussions, and full texts of original papers. We demonstrate that SPICED poses a challenging task and that models trained on SPICED improve downstream performance on evidence retrieval for fact checking of real-world scientific claims. Finally, we show that models trained on SPICED can reveal large-scale trends in the degrees to which people and organizations faithfully communicate new scientific findings. Data, code, and pre-trained models are available at http://www.copenlu.com/publication/2022_emnlp_wright/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.13001v1-abstract-full').style.display = 'none'; document.getElementById('2210.13001v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In EMNLP 2022; 25 pages; 11 figures; 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.07430">arXiv:2209.07430</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.07430">pdf</a>, <a href="https://arxiv.org/format/2209.07430">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Machine Reading, Fast and Slow: When Do Models &#34;Understand&#34; Language? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Choudhury%2C+S+R">Sagnik Ray Choudhury</a>, <a href="/search/?searchtype=author&amp;query=Rogers%2C+A">Anna Rogers</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.07430v1-abstract-short" style="display: inline;"> Two of the most fundamental challenges in Natural Language Understanding (NLU) at present are: (a) how to establish whether deep learning-based models score highly on NLU benchmarks for the &#39;right&#39; reasons; and (b) to understand what those reasons would even be. We investigate the behavior of reading comprehension models with respect to two linguistic &#39;skills&#39;: coreference resolution and compariso&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.07430v1-abstract-full').style.display = 'inline'; document.getElementById('2209.07430v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.07430v1-abstract-full" style="display: none;"> Two of the most fundamental challenges in Natural Language Understanding (NLU) at present are: (a) how to establish whether deep learning-based models score highly on NLU benchmarks for the &#39;right&#39; reasons; and (b) to understand what those reasons would even be. We investigate the behavior of reading comprehension models with respect to two linguistic &#39;skills&#39;: coreference resolution and comparison. We propose a definition for the reasoning steps expected from a system that would be &#39;reading slowly&#39;, and compare that with the behavior of five models of the BERT family of various sizes, observed through saliency scores and counterfactual explanations. We find that for comparison (but not coreference) the systems based on larger encoders are more likely to rely on the &#39;right&#39; information, but even they struggle with generalization, suggesting that they still learn specific lexical patterns rather than the general principles of comparison. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.07430v1-abstract-full').style.display = 'none'; document.getElementById('2209.07430v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted COLING 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.04238">arXiv:2205.04238</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.04238">pdf</a>, <a href="https://arxiv.org/format/2205.04238">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Counterfactually Augmented Data and Unintended Bias: The Case of Sexism and Hate Speech Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Sen%2C+I">Indira Sen</a>, <a href="/search/?searchtype=author&amp;query=Samory%2C+M">Mattia Samory</a>, <a href="/search/?searchtype=author&amp;query=Wagner%2C+C">Claudia Wagner</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.04238v1-abstract-short" style="display: inline;"> Counterfactually Augmented Data (CAD) aims to improve out-of-domain generalizability, an indicator of model robustness. The improvement is credited with promoting core features of the construct over spurious artifacts that happen to correlate with it. Yet, over-relying on core features may lead to unintended model bias. Especially, construct-driven CAD -- perturbations of core features -- may indu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.04238v1-abstract-full').style.display = 'inline'; document.getElementById('2205.04238v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.04238v1-abstract-full" style="display: none;"> Counterfactually Augmented Data (CAD) aims to improve out-of-domain generalizability, an indicator of model robustness. The improvement is credited with promoting core features of the construct over spurious artifacts that happen to correlate with it. Yet, over-relying on core features may lead to unintended model bias. Especially, construct-driven CAD -- perturbations of core features -- may induce models to ignore the context in which core features are used. Here, we test models for sexism and hate speech detection on challenging data: non-hateful and non-sexist usage of identity and gendered terms. In these hard cases, models trained on CAD, especially construct-driven CAD, show higher false-positive rates than models trained on the original, unperturbed data. Using a diverse set of CAD -- construct-driven and construct-agnostic -- reduces such unintended bias. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.04238v1-abstract-full').style.display = 'none'; document.getElementById('2205.04238v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL&#39;22 as a short paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.02023">arXiv:2205.02023</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2205.02023">pdf</a>, <a href="https://arxiv.org/format/2205.02023">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Same Neurons, Different Languages: Probing Morphosyntax in Multilingual Pre-trained Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Sta%C5%84czak%2C+K">Karolina Sta艅czak</a>, <a href="/search/?searchtype=author&amp;query=Ponti%2C+E">Edoardo Ponti</a>, <a href="/search/?searchtype=author&amp;query=Hennigen%2C+L+T">Lucas Torroba Hennigen</a>, <a href="/search/?searchtype=author&amp;query=Cotterell%2C+R">Ryan Cotterell</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.02023v3-abstract-short" style="display: inline;"> The success of multilingual pre-trained models is underpinned by their ability to learn representations shared by multiple languages even in absence of any explicit supervision. However, it remains unclear how these models learn to generalise across languages. In this work, we conjecture that multilingual pre-trained models can derive language-universal abstractions about grammar. In particular, w&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.02023v3-abstract-full').style.display = 'inline'; document.getElementById('2205.02023v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.02023v3-abstract-full" style="display: none;"> The success of multilingual pre-trained models is underpinned by their ability to learn representations shared by multiple languages even in absence of any explicit supervision. However, it remains unclear how these models learn to generalise across languages. In this work, we conjecture that multilingual pre-trained models can derive language-universal abstractions about grammar. In particular, we investigate whether morphosyntactic information is encoded in the same subset of neurons in different languages. We conduct the first large-scale empirical study over 43 languages and 14 morphosyntactic categories with a state-of-the-art neuron-level probe. Our findings show that the cross-lingual overlap between neurons is significant, but its extent may vary across categories and depends on language proximity and pre-training data size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.02023v3-abstract-full').style.display = 'none'; document.getElementById('2205.02023v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NAACL 2022 (Main Conference)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.02007">arXiv:2204.02007</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2204.02007">pdf</a>, <a href="https://arxiv.org/format/2204.02007">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Fact Checking with Insufficient Evidence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Atanasova%2C+P">Pepa Atanasova</a>, <a href="/search/?searchtype=author&amp;query=Simonsen%2C+J+G">Jakob Grue Simonsen</a>, <a href="/search/?searchtype=author&amp;query=Lioma%2C+C">Christina Lioma</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.02007v1-abstract-short" style="display: inline;"> Automating the fact checking (FC) process relies on information obtained from external sources. In this work, we posit that it is crucial for FC models to make veracity predictions only when there is sufficient evidence and otherwise indicate when it is not enough. To this end, we are the first to study what information FC models consider sufficient by introducing a novel task and advancing it wit&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02007v1-abstract-full').style.display = 'inline'; document.getElementById('2204.02007v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.02007v1-abstract-full" style="display: none;"> Automating the fact checking (FC) process relies on information obtained from external sources. In this work, we posit that it is crucial for FC models to make veracity predictions only when there is sufficient evidence and otherwise indicate when it is not enough. To this end, we are the first to study what information FC models consider sufficient by introducing a novel task and advancing it with three main contributions. First, we conduct an in-depth empirical analysis of the task with a new fluency-preserving method for omitting information from the evidence at the constituent and sentence level. We identify when models consider the remaining evidence (in)sufficient for FC, based on three trained models with different Transformer architectures and three FC datasets. Second, we ask annotators whether the omitted evidence was important for FC, resulting in a novel diagnostic dataset, SufficientFacts, for FC with omitted evidence. We find that models are least successful in detecting missing evidence when adverbial modifiers are omitted (21% accuracy), whereas it is easiest for omitted date modifiers (63% accuracy). Finally, we propose a novel data augmentation strategy for contrastive self-learning of missing evidence by employing the proposed omission method combined with tri-training. It improves performance for Evidence Sufficiency Prediction by up to 17.8 F1 score, which in turn improves FC performance by up to 2.6 F1 score. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.02007v1-abstract-full').style.display = 'none'; document.getElementById('2204.02007v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> cs.CL </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.13722">arXiv:2203.13722</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2203.13722">pdf</a>, <a href="https://arxiv.org/format/2203.13722">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Probing Pre-Trained Language Models for Cross-Cultural Differences in Values </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Arora%2C+A">Arnav Arora</a>, <a href="/search/?searchtype=author&amp;query=Kaffee%2C+L">Lucie-Aim茅e Kaffee</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.13722v2-abstract-short" style="display: inline;"> Language embeds information about social, cultural, and political values people hold. Prior work has explored social and potentially harmful biases encoded in Pre-Trained Language models (PTLMs). However, there has been no systematic study investigating how values embedded in these models vary across cultures. In this paper, we introduce probes to study which values across cultures are embedded in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.13722v2-abstract-full').style.display = 'inline'; document.getElementById('2203.13722v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.13722v2-abstract-full" style="display: none;"> Language embeds information about social, cultural, and political values people hold. Prior work has explored social and potentially harmful biases encoded in Pre-Trained Language models (PTLMs). However, there has been no systematic study investigating how values embedded in these models vary across cultures. In this paper, we introduce probes to study which values across cultures are embedded in these models, and whether they align with existing theories and cross-cultural value surveys. We find that PTLMs capture differences in values across cultures, but those only weakly align with established value surveys. We discuss implications of using mis-aligned models in cross-cultural settings, as well as ways of aligning PTLMs with value surveys. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.13722v2-abstract-full').style.display = 'none'; document.getElementById('2203.13722v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.12990">arXiv:2203.12990</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2203.12990">pdf</a>, <a href="https://arxiv.org/format/2203.12990">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Generating Scientific Claims for Zero-Shot Scientific Fact Checking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Wright%2C+D">Dustin Wright</a>, <a href="/search/?searchtype=author&amp;query=Wadden%2C+D">David Wadden</a>, <a href="/search/?searchtype=author&amp;query=Lo%2C+K">Kyle Lo</a>, <a href="/search/?searchtype=author&amp;query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&amp;query=Cohan%2C+A">Arman Cohan</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Wang%2C+L+L">Lucy Lu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.12990v1-abstract-short" style="display: inline;"> Automated scientific fact checking is difficult due to the complexity of scientific language and a lack of significant amounts of training data, as annotation requires domain expertise. To address this challenge, we propose scientific claim generation, the task of generating one or more atomic and verifiable claims from scientific sentences, and demonstrate its usefulness in zero-shot fact checkin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.12990v1-abstract-full').style.display = 'inline'; document.getElementById('2203.12990v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.12990v1-abstract-full" style="display: none;"> Automated scientific fact checking is difficult due to the complexity of scientific language and a lack of significant amounts of training data, as annotation requires domain expertise. To address this challenge, we propose scientific claim generation, the task of generating one or more atomic and verifiable claims from scientific sentences, and demonstrate its usefulness in zero-shot fact checking for biomedical claims. We propose CLAIMGEN-BART, a new supervised method for generating claims supported by the literature, as well as KBIN, a novel method for generating claim negations. Additionally, we adapt an existing unsupervised entity-centric method of claim generation to biomedical claims, which we call CLAIMGEN-ENTITY. Experiments on zero-shot fact checking demonstrate that both CLAIMGEN-ENTITY and CLAIMGEN-BART, coupled with KBIN, achieve up to 90% performance of fully supervised models trained on manually annotated claims and evidence. A rigorous evaluation study demonstrates significant improvement in generated claim and negation quality over existing baselines <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.12990v1-abstract-full').style.display = 'none'; document.getElementById('2203.12990v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2022; 13 pages, 3 figures, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.06671">arXiv:2202.06671</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2202.06671">pdf</a>, <a href="https://arxiv.org/format/2202.06671">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Neighborhood Contrastive Learning for Scientific Document Representations with Citation Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Ostendorff%2C+M">Malte Ostendorff</a>, <a href="/search/?searchtype=author&amp;query=Rethmeier%2C+N">Nils Rethmeier</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&amp;query=Gipp%2C+B">Bela Gipp</a>, <a href="/search/?searchtype=author&amp;query=Rehm%2C+G">Georg Rehm</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.06671v2-abstract-short" style="display: inline;"> Learning scientific document representations can be substantially improved through contrastive learning objectives, where the challenge lies in creating positive and negative training samples that encode the desired similarity semantics. Prior work relies on discrete citation relations to generate contrast samples. However, discrete citations enforce a hard cut-off to similarity. This is counter-i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06671v2-abstract-full').style.display = 'inline'; document.getElementById('2202.06671v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.06671v2-abstract-full" style="display: none;"> Learning scientific document representations can be substantially improved through contrastive learning objectives, where the challenge lies in creating positive and negative training samples that encode the desired similarity semantics. Prior work relies on discrete citation relations to generate contrast samples. However, discrete citations enforce a hard cut-off to similarity. This is counter-intuitive to similarity-based learning, and ignores that scientific papers can be very similar despite lacking a direct citation - a core problem of finding related research. Instead, we use controlled nearest neighbor sampling over citation graph embeddings for contrastive learning. This control allows us to learn continuous similarity, to sample hard-to-learn negatives and positives, and also to avoid collisions between negative and positive samples by controlling the sampling margin between them. The resulting method SciNCL outperforms the state-of-the-art on the SciDocs benchmark. Furthermore, we demonstrate that it can train (or tune) models sample-efficiently, and that it can be combined with recent training-efficient methods. Perhaps surprisingly, even training a general-domain language model this way outperforms baselines pretrained in-domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06671v2-abstract-full').style.display = 'none'; document.getElementById('2202.06671v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.08214">arXiv:2201.08214</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2201.08214">pdf</a>, <a href="https://arxiv.org/format/2201.08214">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Latent-Variable Model for Intrinsic Probing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&amp;query=Sta%C5%84czak%2C+K">Karolina Sta艅czak</a>, <a href="/search/?searchtype=author&amp;query=Hennigen%2C+L+T">Lucas Torroba Hennigen</a>, <a href="/search/?searchtype=author&amp;query=Williams%2C+A">Adina Williams</a>, <a href="/search/?searchtype=author&amp;query=Cotterell%2C+R">Ryan Cotterell</a>, <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I">Isabelle Augenstein</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.08214v3-abstract-short" style="display: inline;"> The success of pre-trained contextualized representations has prompted researchers to analyze them for the presence of linguistic information. Indeed, it is natural to assume that these pre-trained representations do encode some level of linguistic knowledge as they have brought about large empirical improvements on a wide variety of NLP tasks, which suggests they are learning true linguistic gene&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.08214v3-abstract-full').style.display = 'inline'; document.getElementById('2201.08214v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.08214v3-abstract-full" style="display: none;"> The success of pre-trained contextualized representations has prompted researchers to analyze them for the presence of linguistic information. Indeed, it is natural to assume that these pre-trained representations do encode some level of linguistic knowledge as they have brought about large empirical improvements on a wide variety of NLP tasks, which suggests they are learning true linguistic generalization. In this work, we focus on intrinsic probing, an analysis technique where the goal is not only to identify whether a representation encodes a linguistic attribute but also to pinpoint where this attribute is encoded. We propose a novel latent-variable formulation for constructing intrinsic probes and derive a tractable variational approximation to the log-likelihood. Our results show that our model is versatile and yields tighter mutual information estimates than two intrinsic probes previously proposed in the literature. Finally, we find empirical evidence that pre-trained representations develop a cross-lingually entangled notion of morphosyntax. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.08214v3-abstract-full').style.display = 'none'; document.getElementById('2201.08214v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Augenstein%2C+I&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10