Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 70 results for author: <span class="mathjax">Cohen, S B</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Cohen%2C+S+B">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Cohen, S B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Cohen%2C+S+B&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Cohen, S B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Cohen%2C+S+B&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Cohen%2C+S+B&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Cohen%2C+S+B&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12064">arXiv:2411.12064</a> <span> [<a href="https://arxiv.org/pdf/2411.12064">pdf</a>, <a href="https://arxiv.org/format/2411.12064">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> TSPRank: Bridging Pairwise and Listwise Methods with a Bilinear Travelling Salesman Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+W+W">Weixian Waylon Li</a>, <a href="/search/cs?searchtype=author&query=Ziser%2C+Y">Yftah Ziser</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yifei Xie</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+T">Tiejun Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12064v1-abstract-short" style="display: inline;"> Traditional Learning-To-Rank (LETOR) approaches, including pairwise methods like RankNet and LambdaMART, often fall short by solely focusing on pairwise comparisons, leading to sub-optimal global rankings. Conversely, deep learning based listwise methods, while aiming to optimise entire lists, require complex tuning and yield only marginal improvements over robust pairwise models. To overcome thes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12064v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12064v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12064v1-abstract-full" style="display: none;"> Traditional Learning-To-Rank (LETOR) approaches, including pairwise methods like RankNet and LambdaMART, often fall short by solely focusing on pairwise comparisons, leading to sub-optimal global rankings. Conversely, deep learning based listwise methods, while aiming to optimise entire lists, require complex tuning and yield only marginal improvements over robust pairwise models. To overcome these limitations, we introduce Travelling Salesman Problem Rank (TSPRank), a hybrid pairwise-listwise ranking method. TSPRank reframes the ranking problem as a Travelling Salesman Problem (TSP), a well-known combinatorial optimisation challenge that has been extensively studied for its numerous solution algorithms and applications. This approach enables the modelling of pairwise relationships and leverages combinatorial optimisation to determine the listwise ranking. This approach can be directly integrated as an additional component into embeddings generated by existing backbone models to enhance ranking performance. Our extensive experiments across three backbone models on diverse tasks, including stock ranking, information retrieval, and historical events ordering, demonstrate that TSPRank significantly outperforms both pure pairwise and listwise methods. Our qualitative analysis reveals that TSPRank's main advantage over existing methods is its ability to harness global information better while ranking. TSPRank's robustness and superior performance across different domains highlight its potential as a versatile and effective LETOR solution. The code and preprocessed data are available at https://github.com/waylonli/TSPRank-KDD2025. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12064v1-abstract-full').style.display = 'none'; document.getElementById('2411.12064v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACM SIGKDD 2025 Research Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20008">arXiv:2410.20008</a> <span> [<a href="https://arxiv.org/pdf/2410.20008">pdf</a>, <a href="https://arxiv.org/format/2410.20008">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Layer by Layer: Uncovering Where Multi-Task Learning Happens in Instruction-Tuned Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Ziser%2C+Y">Yftah Ziser</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20008v1-abstract-short" style="display: inline;"> Fine-tuning pre-trained large language models (LLMs) on a diverse array of tasks has become a common approach for building models that can solve various natural language processing (NLP) tasks. However, where and to what extent these models retain task-specific knowledge remains largely unexplored. This study investigates the task-specific information encoded in pre-trained LLMs and the effects of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20008v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20008v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20008v1-abstract-full" style="display: none;"> Fine-tuning pre-trained large language models (LLMs) on a diverse array of tasks has become a common approach for building models that can solve various natural language processing (NLP) tasks. However, where and to what extent these models retain task-specific knowledge remains largely unexplored. This study investigates the task-specific information encoded in pre-trained LLMs and the effects of instruction tuning on their representations across a diverse set of over 60 NLP tasks. We use a set of matrix analysis tools to examine the differences between the way pre-trained and instruction-tuned LLMs store task-specific information. Our findings reveal that while some tasks are already encoded within the pre-trained LLMs, others greatly benefit from instruction tuning. Additionally, we pinpointed the layers in which the model transitions from high-level general representations to more task-oriented representations. This finding extends our understanding of the governing mechanisms of LLMs and facilitates future research in the fields of parameter-efficient transfer learning and multi-task learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20008v1-abstract-full').style.display = 'none'; document.getElementById('2410.20008v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10614">arXiv:2410.10614</a> <span> [<a href="https://arxiv.org/pdf/2410.10614">pdf</a>, <a href="https://arxiv.org/format/2410.10614">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Finance">q-fin.CP</span> </div> </div> <p class="title is-5 mathjax"> Modeling News Interactions and Influence for Financial Market Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mengyu Wang</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+T">Tiejun Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10614v1-abstract-short" style="display: inline;"> The diffusion of financial news into market prices is a complex process, making it challenging to evaluate the connections between news events and market movements. This paper introduces FININ (Financial Interconnected News Influence Network), a novel market prediction model that captures not only the links between news and prices but also the interactions among news items themselves. FININ effect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10614v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10614v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10614v1-abstract-full" style="display: none;"> The diffusion of financial news into market prices is a complex process, making it challenging to evaluate the connections between news events and market movements. This paper introduces FININ (Financial Interconnected News Influence Network), a novel market prediction model that captures not only the links between news and prices but also the interactions among news items themselves. FININ effectively integrates multi-modal information from both market data and news articles. We conduct extensive experiments on two datasets, encompassing the S&P 500 and NASDAQ 100 indices over a 15-year period and over 2.7 million news articles. The results demonstrate FININ's effectiveness, outperforming advanced market prediction models with an improvement of 0.429 and 0.341 in the daily Sharpe ratio for the two markets respectively. Moreover, our results reveal insights into the financial news, including the delayed market pricing of news, the long memory effect of news, and the limitations of financial sentiment analysis in fully extracting predictive power from news data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10614v1-abstract-full').style.display = 'none'; document.getElementById('2410.10614v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10336">arXiv:2410.10336</a> <span> [<a href="https://arxiv.org/pdf/2410.10336">pdf</a>, <a href="https://arxiv.org/format/2410.10336">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Symbolic Computation">cs.SC</span> </div> </div> <p class="title is-5 mathjax"> CoMAT: Chain of Mathematically Annotated Thought Improves Mathematical Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Leang%2C+J+O+J">Joshua Ong Jun Leang</a>, <a href="/search/cs?searchtype=author&query=Gema%2C+A+P">Aryo Pradipta Gema</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10336v1-abstract-short" style="display: inline;"> Mathematical reasoning remains a significant challenge for large language models (LLMs), despite progress in prompting techniques such as Chain-of-Thought (CoT). We present Chain of Mathematically Annotated Thought (CoMAT), which enhances reasoning through two stages: Symbolic Conversion (converting natural language queries into symbolic form) and Reasoning Execution (deriving answers from symboli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10336v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10336v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10336v1-abstract-full" style="display: none;"> Mathematical reasoning remains a significant challenge for large language models (LLMs), despite progress in prompting techniques such as Chain-of-Thought (CoT). We present Chain of Mathematically Annotated Thought (CoMAT), which enhances reasoning through two stages: Symbolic Conversion (converting natural language queries into symbolic form) and Reasoning Execution (deriving answers from symbolic representations). CoMAT operates entirely with a single LLM and without external solvers. Across four LLMs, CoMAT outperforms traditional CoT on six out of seven benchmarks, achieving gains of 4.48% on MMLU-Redux (MATH) and 4.58% on GaoKao MCQ. In addition to improved performance, CoMAT ensures faithfulness and verifiability, offering a transparent reasoning process for complex mathematical tasks <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10336v1-abstract-full').style.display = 'none'; document.getElementById('2410.10336v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08811">arXiv:2410.08811</a> <span> [<a href="https://arxiv.org/pdf/2410.08811">pdf</a>, <a href="https://arxiv.org/format/2410.08811">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PoisonBench: Assessing Large Language Model Vulnerability to Data Poisoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fu%2C+T">Tingchen Fu</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+M">Mrinank Sharma</a>, <a href="/search/cs?searchtype=author&query=Torr%2C+P">Philip Torr</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Krueger%2C+D">David Krueger</a>, <a href="/search/cs?searchtype=author&query=Barez%2C+F">Fazl Barez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08811v1-abstract-short" style="display: inline;"> Preference learning is a central component for aligning current LLMs, but this process can be vulnerable to data poisoning attacks. To address this concern, we introduce PoisonBench, a benchmark for evaluating large language models' susceptibility to data poisoning during preference learning. Data poisoning attacks can manipulate large language model responses to include hidden malicious content o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08811v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08811v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08811v1-abstract-full" style="display: none;"> Preference learning is a central component for aligning current LLMs, but this process can be vulnerable to data poisoning attacks. To address this concern, we introduce PoisonBench, a benchmark for evaluating large language models' susceptibility to data poisoning during preference learning. Data poisoning attacks can manipulate large language model responses to include hidden malicious content or biases, potentially causing the model to generate harmful or unintended outputs while appearing to function normally. We deploy two distinct attack types across eight realistic scenarios, assessing 21 widely-used models. Our findings reveal concerning trends: (1) Scaling up parameter size does not inherently enhance resilience against poisoning attacks; (2) There exists a log-linear relationship between the effects of the attack and the data poison ratio; (3) The effect of data poisoning can generalize to extrapolated triggers that are not included in the poisoned data. These results expose weaknesses in current preference learning techniques, highlighting the urgent need for more robust defenses against malicious models and data manipulation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08811v1-abstract-full').style.display = 'none'; document.getElementById('2410.08811v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Tingchen Fu and Fazl Barez are core research contributors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11081">arXiv:2408.11081</a> <span> [<a href="https://arxiv.org/pdf/2408.11081">pdf</a>, <a href="https://arxiv.org/format/2408.11081">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> What can Large Language Models Capture about Code Functional Equivalence? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Maveli%2C+N">Nickil Maveli</a>, <a href="/search/cs?searchtype=author&query=Vergari%2C+A">Antonio Vergari</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11081v1-abstract-short" style="display: inline;"> Code-LLMs, LLMs pre-trained on large code corpora, have shown great progress in learning rich representations of the structure and syntax of code, successfully using it to generate or classify code fragments. At the same time, understanding if they are able to do so because they capture code semantics, and how well, is still an open question. In this paper, we tackle this problem by introducing Se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11081v1-abstract-full').style.display = 'inline'; document.getElementById('2408.11081v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11081v1-abstract-full" style="display: none;"> Code-LLMs, LLMs pre-trained on large code corpora, have shown great progress in learning rich representations of the structure and syntax of code, successfully using it to generate or classify code fragments. At the same time, understanding if they are able to do so because they capture code semantics, and how well, is still an open question. In this paper, we tackle this problem by introducing SeqCoBench, a benchmark for systematically assessing how Code-LLMs can capture code functional equivalence. SeqCoBench contains over 20 code transformations that either preserve or alter the semantics of Python programs. We conduct extensive evaluations in different settings, including zero-shot and parameter-efficient finetuning methods on state-of-the-art (Code-)LLMs to see if they can discern semantically equivalent or different pairs of programs in SeqCoBench. We find that the performance gap between these LLMs and classical match-based retrieval scores is minimal, with both approaches showing a concerning lack of depth in understanding code semantics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11081v1-abstract-full').style.display = 'none'; document.getElementById('2408.11081v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">37 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03277">arXiv:2407.03277</a> <span> [<a href="https://arxiv.org/pdf/2407.03277">pdf</a>, <a href="https://arxiv.org/format/2407.03277">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Automatic Metrics with Incremental Machine Translation Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+G">Guojun Wu</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Sennrich%2C+R">Rico Sennrich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03277v2-abstract-short" style="display: inline;"> We introduce a dataset comprising commercial machine translations, gathered weekly over six years across 12 translation directions. Since human A/B testing is commonly used, we assume commercial systems improve over time, which enables us to evaluate machine translation (MT) metrics based on their preference for more recent translations. Our study not only confirms several prior findings, such as… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03277v2-abstract-full').style.display = 'inline'; document.getElementById('2407.03277v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03277v2-abstract-full" style="display: none;"> We introduce a dataset comprising commercial machine translations, gathered weekly over six years across 12 translation directions. Since human A/B testing is commonly used, we assume commercial systems improve over time, which enables us to evaluate machine translation (MT) metrics based on their preference for more recent translations. Our study not only confirms several prior findings, such as the advantage of neural metrics over non-neural ones, but also explores the debated issue of how MT quality affects metric reliability--an investigation that smaller datasets in previous research could not sufficiently explore. Overall, our research demonstrates the dataset's value as a testbed for metric evaluation. We release our code at https://github.com/gjwubyron/Evo <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03277v2-abstract-full').style.display = 'none'; document.getElementById('2407.03277v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20838">arXiv:2405.20838</a> <span> [<a href="https://arxiv.org/pdf/2405.20838">pdf</a>, <a href="https://arxiv.org/format/2405.20838">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> einspace: Searching for Neural Architectures from Fundamental Operations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ericsson%2C+L">Linus Ericsson</a>, <a href="/search/cs?searchtype=author&query=Espinosa%2C+M">Miguel Espinosa</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chenhongyi Yang</a>, <a href="/search/cs?searchtype=author&query=Antoniou%2C+A">Antreas Antoniou</a>, <a href="/search/cs?searchtype=author&query=Storkey%2C+A">Amos Storkey</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=McDonagh%2C+S">Steven McDonagh</a>, <a href="/search/cs?searchtype=author&query=Crowley%2C+E+J">Elliot J. Crowley</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20838v2-abstract-short" style="display: inline;"> Neural architecture search (NAS) finds high performing networks for a given task. Yet the results of NAS are fairly prosaic; they did not e.g. create a shift from convolutional structures to transformers. This is not least because the search spaces in NAS often aren't diverse enough to include such transformations a priori. Instead, for NAS to provide greater potential for fundamental design shift… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20838v2-abstract-full').style.display = 'inline'; document.getElementById('2405.20838v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20838v2-abstract-full" style="display: none;"> Neural architecture search (NAS) finds high performing networks for a given task. Yet the results of NAS are fairly prosaic; they did not e.g. create a shift from convolutional structures to transformers. This is not least because the search spaces in NAS often aren't diverse enough to include such transformations a priori. Instead, for NAS to provide greater potential for fundamental design shifts, we need a novel expressive search space design which is built from more fundamental operations. To this end, we introduce einspace, a search space based on a parameterised probabilistic context-free grammar. Our space is versatile, supporting architectures of various sizes and complexities, while also containing diverse network operations which allow it to model convolutions, attention components and more. It contains many existing competitive architectures, and provides flexibility for discovering new ones. Using this search space, we perform experiments to find novel architectures as well as improvements on existing ones on the diverse Unseen NAS datasets. We show that competitive architectures can be obtained by searching from scratch, and we consistently find large improvements when initialising the search with strong baselines. We believe that this work is an important advancement towards a transformative NAS paradigm where search space expressivity and strategic search initialisation play key roles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20838v2-abstract-full').style.display = 'none'; document.getElementById('2405.20838v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024. Project page at https://linusericsson.github.io/einspace/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09719">arXiv:2405.09719</a> <span> [<a href="https://arxiv.org/pdf/2405.09719">pdf</a>, <a href="https://arxiv.org/format/2405.09719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Spectral Editing of Activations for Large Language Model Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+Y">Yifu Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Ziser%2C+Y">Yftah Ziser</a>, <a href="/search/cs?searchtype=author&query=Korhonen%2C+A">Anna Korhonen</a>, <a href="/search/cs?searchtype=author&query=Ponti%2C+E+M">Edoardo M. Ponti</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09719v3-abstract-short" style="display: inline;"> Large language models (LLMs) often exhibit undesirable behaviours, such as generating untruthful or biased content. Editing their internal representations has been shown to be effective in mitigating such behaviours on top of the existing alignment methods. We propose a novel inference-time editing method, namely spectral editing of activations (SEA), to project the input representations into dire… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09719v3-abstract-full').style.display = 'inline'; document.getElementById('2405.09719v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09719v3-abstract-full" style="display: none;"> Large language models (LLMs) often exhibit undesirable behaviours, such as generating untruthful or biased content. Editing their internal representations has been shown to be effective in mitigating such behaviours on top of the existing alignment methods. We propose a novel inference-time editing method, namely spectral editing of activations (SEA), to project the input representations into directions with maximal covariance with the positive demonstrations (e.g., truthful) while minimising covariance with the negative demonstrations (e.g., hallucinated). We also extend our method to non-linear editing using feature functions. We run extensive experiments on benchmarks concerning truthfulness and bias with six open-source LLMs of different sizes and model families. The results demonstrate the superiority of SEA in effectiveness, generalisation to similar tasks, as well as computation and data efficiency. We also show that SEA editing only has a limited negative impact on other model capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09719v3-abstract-full').style.display = 'none'; document.getElementById('2405.09719v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.13312">arXiv:2403.13312</a> <span> [<a href="https://arxiv.org/pdf/2403.13312">pdf</a>, <a href="https://arxiv.org/format/2403.13312">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LeanReasoner: Boosting Complex Logical Reasoning with Lean </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+D">Dongwei Jiang</a>, <a href="/search/cs?searchtype=author&query=Fonseca%2C+M">Marcio Fonseca</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.13312v1-abstract-short" style="display: inline;"> Large language models (LLMs) often struggle with complex logical reasoning due to logical inconsistencies and the inherent difficulty of such reasoning. We use Lean, a theorem proving framework, to address these challenges. By formalizing logical reasoning problems into theorems within Lean, we can solve them by proving or disproving the corresponding theorems. This method reduces the risk of logi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13312v1-abstract-full').style.display = 'inline'; document.getElementById('2403.13312v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.13312v1-abstract-full" style="display: none;"> Large language models (LLMs) often struggle with complex logical reasoning due to logical inconsistencies and the inherent difficulty of such reasoning. We use Lean, a theorem proving framework, to address these challenges. By formalizing logical reasoning problems into theorems within Lean, we can solve them by proving or disproving the corresponding theorems. This method reduces the risk of logical inconsistencies with the help of Lean's symbolic solver. It also enhances our ability to treat complex reasoning tasks by using Lean's extensive library of theorem proofs. Our method achieves state-of-the-art performance on the FOLIO dataset and achieves performance near this level on ProofWriter. Notably, these results were accomplished by fine-tuning on fewer than 100 in-domain samples for each dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13312v1-abstract-full').style.display = 'none'; document.getElementById('2403.13312v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL 2024 main conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.08828">arXiv:2403.08828</a> <span> [<a href="https://arxiv.org/pdf/2403.08828">pdf</a>, <a href="https://arxiv.org/format/2403.08828">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> People Attribute Purpose to Autonomous Vehicles When Explaining Their Behavior </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gyevnar%2C+B">Balint Gyevnar</a>, <a href="/search/cs?searchtype=author&query=Droop%2C+S">Stephanie Droop</a>, <a href="/search/cs?searchtype=author&query=Quillien%2C+T">Tadeg Quillien</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Bramley%2C+N+R">Neil R. Bramley</a>, <a href="/search/cs?searchtype=author&query=Lucas%2C+C+G">Christopher G. Lucas</a>, <a href="/search/cs?searchtype=author&query=Albrecht%2C+S+V">Stefano V. Albrecht</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.08828v2-abstract-short" style="display: inline;"> Cognitive science can help us understand which explanations people might expect, and in which format they frame these explanations, whether causal, counterfactual, or teleological (i.e., purpose-oriented). Understanding the relevance of these concepts is crucial for building good explainable AI (XAI) which offers recourse and actionability. Focusing on autonomous driving, a complex decision-making… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08828v2-abstract-full').style.display = 'inline'; document.getElementById('2403.08828v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.08828v2-abstract-full" style="display: none;"> Cognitive science can help us understand which explanations people might expect, and in which format they frame these explanations, whether causal, counterfactual, or teleological (i.e., purpose-oriented). Understanding the relevance of these concepts is crucial for building good explainable AI (XAI) which offers recourse and actionability. Focusing on autonomous driving, a complex decision-making domain, we report empirical data from two surveys on (i) how people explain the behavior of autonomous vehicles in 14 unique scenarios (N1=54), and (ii) how they perceive these explanations in terms of complexity, quality, and trustworthiness (N2=356). Participants deemed teleological explanations significantly better quality than counterfactual ones, with perceived teleology being the best predictor of perceived quality and trustworthiness. Neither the perceived teleology nor the quality were affected by whether the car was an autonomous vehicle or driven by a person. This indicates that people use teleology to evaluate information about not just other people but also autonomous vehicles. Taken together, our findings highlight the importance of explanations that are framed in terms of purpose rather than just, as is standard in XAI, the causal mechanisms involved. We release the 14 scenarios and more than 1,300 elicited explanations publicly as the Human Explanations for Autonomous Driving Decisions (HEADD) dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.08828v2-abstract-full').style.display = 'none'; document.getElementById('2403.08828v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15055">arXiv:2402.15055</a> <span> [<a href="https://arxiv.org/pdf/2402.15055">pdf</a>, <a href="https://arxiv.org/format/2402.15055">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Interpreting Context Look-ups in Transformers: Investigating Attention-MLP Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Neo%2C+C">Clement Neo</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Barez%2C+F">Fazl Barez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15055v2-abstract-short" style="display: inline;"> Understanding the inner workings of large language models (LLMs) is crucial for advancing their theoretical foundations and real-world applications. While the attention mechanism and multi-layer perceptrons (MLPs) have been studied independently, their interactions remain largely unexplored. This study investigates how attention heads and next-token neurons interact in LLMs to predict new words. W… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15055v2-abstract-full').style.display = 'inline'; document.getElementById('2402.15055v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15055v2-abstract-full" style="display: none;"> Understanding the inner workings of large language models (LLMs) is crucial for advancing their theoretical foundations and real-world applications. While the attention mechanism and multi-layer perceptrons (MLPs) have been studied independently, their interactions remain largely unexplored. This study investigates how attention heads and next-token neurons interact in LLMs to predict new words. We propose a methodology to identify next-token neurons, find prompts that highly activate them, and determine the upstream attention heads responsible. We then generate and evaluate explanations for the activity of these attention heads in an automated manner. Our findings reveal that some attention heads recognize specific contexts relevant to predicting a token and activate a downstream token-predicting neuron accordingly. This mechanism provides a deeper understanding of how attention heads work with MLP neurons to perform next-token prediction. Our approach offers a foundation for further research into the intricate workings of LLMs and their impact on text generation and understanding. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15055v2-abstract-full').style.display = 'none'; document.getElementById('2402.15055v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2024 Main Conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.10643">arXiv:2402.10643</a> <span> [<a href="https://arxiv.org/pdf/2402.10643">pdf</a>, <a href="https://arxiv.org/format/2402.10643">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> `Keep it Together': Enforcing Cohesion in Extractive Summaries by Simulating Human Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cardenas%2C+R">Ronald Cardenas</a>, <a href="/search/cs?searchtype=author&query=Galle%2C+M">Matthias Galle</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.10643v1-abstract-short" style="display: inline;"> Extractive summaries are usually presented as lists of sentences with no expected cohesion between them. In this paper, we aim to enforce cohesion whilst controlling for informativeness and redundancy in summaries, in cases where the input exhibits high redundancy. The pipeline controls for redundancy in long inputs as it is consumed, and balances informativeness and cohesion during sentence selec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.10643v1-abstract-full').style.display = 'inline'; document.getElementById('2402.10643v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.10643v1-abstract-full" style="display: none;"> Extractive summaries are usually presented as lists of sentences with no expected cohesion between them. In this paper, we aim to enforce cohesion whilst controlling for informativeness and redundancy in summaries, in cases where the input exhibits high redundancy. The pipeline controls for redundancy in long inputs as it is consumed, and balances informativeness and cohesion during sentence selection. Our sentence selector simulates human memory to keep track of topics --modeled as lexical chains--, enforcing cohesive ties between noun phrases. Across a variety of domains, our experiments revealed that it is possible to extract highly cohesive summaries that nevertheless read as informative to humans as summaries extracted by only accounting for informativeness or redundancy. The extracted summaries exhibit smooth topic transitions between sentences as signaled by lexical chains, with chains spanning adjacent or near-adjacent sentences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.10643v1-abstract-full').style.display = 'none'; document.getElementById('2402.10643v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.10415">arXiv:2401.10415</a> <span> [<a href="https://arxiv.org/pdf/2401.10415">pdf</a>, <a href="https://arxiv.org/format/2401.10415">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Can Large Language Model Summarizers Adapt to Diverse Scientific Communication Goals? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fonseca%2C+M">Marcio Fonseca</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.10415v2-abstract-short" style="display: inline;"> In this work, we investigate the controllability of large language models (LLMs) on scientific summarization tasks. We identify key stylistic and content coverage factors that characterize different types of summaries such as paper reviews, abstracts, and lay summaries. By controlling stylistic features, we find that non-fine-tuned LLMs outperform humans in the MuP review generation task, both in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.10415v2-abstract-full').style.display = 'inline'; document.getElementById('2401.10415v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.10415v2-abstract-full" style="display: none;"> In this work, we investigate the controllability of large language models (LLMs) on scientific summarization tasks. We identify key stylistic and content coverage factors that characterize different types of summaries such as paper reviews, abstracts, and lay summaries. By controlling stylistic features, we find that non-fine-tuned LLMs outperform humans in the MuP review generation task, both in terms of similarity to reference summaries and human preferences. Also, we show that we can improve the controllability of LLMs with keyword-based classifier-free guidance (CFG) while achieving lexical overlap comparable to strong fine-tuned baselines on arXiv and PubMed. However, our results also indicate that LLMs cannot consistently generate long summaries with more than 8 sentences. Furthermore, these models exhibit limited capacity to produce highly abstractive lay summaries. Although LLMs demonstrate strong generic summarization competency, sophisticated content control without costly fine-tuning remains an open problem for domain-specific applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.10415v2-abstract-full').style.display = 'none'; document.getElementById('2401.10415v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2024 camera ready</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.01814">arXiv:2401.01814</a> <span> [<a href="https://arxiv.org/pdf/2401.01814">pdf</a>, <a href="https://arxiv.org/format/2401.01814">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models Relearn Removed Concepts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lo%2C+M">Michelle Lo</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Barez%2C+F">Fazl Barez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.01814v1-abstract-short" style="display: inline;"> Advances in model editing through neuron pruning hold promise for removing undesirable concepts from large language models. However, it remains unclear whether models have the capacity to reacquire pruned concepts after editing. To investigate this, we evaluate concept relearning in models by tracking concept saliency and similarity in pruned neurons during retraining. Our findings reveal that mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01814v1-abstract-full').style.display = 'inline'; document.getElementById('2401.01814v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.01814v1-abstract-full" style="display: none;"> Advances in model editing through neuron pruning hold promise for removing undesirable concepts from large language models. However, it remains unclear whether models have the capacity to reacquire pruned concepts after editing. To investigate this, we evaluate concept relearning in models by tracking concept saliency and similarity in pruned neurons during retraining. Our findings reveal that models can quickly regain performance post-pruning by relocating advanced concepts to earlier layers and reallocating pruned concepts to primed neurons with similar semantics. This demonstrates that models exhibit polysemantic capacities and can blend old and new concepts in individual neurons. While neuron pruning provides interpretability into model concepts, our results highlight the challenges of permanent concept removal for improved model \textit{safety}. Monitoring concept reemergence and developing techniques to mitigate relearning of unsafe concepts will be important directions for more robust model editing. Overall, our work strongly demonstrates the resilience and fluidity of concept representations in LLMs post concept removal. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01814v1-abstract-full').style.display = 'none'; document.getElementById('2401.01814v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.03480">arXiv:2312.03480</a> <span> [<a href="https://arxiv.org/pdf/2312.03480">pdf</a>, <a href="https://arxiv.org/format/2312.03480">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AMR Parsing is Far from Solved: GrAPES, the Granular AMR Parsing Evaluation Suite </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Groschwitz%2C+J">Jonas Groschwitz</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Donatelli%2C+L">Lucia Donatelli</a>, <a href="/search/cs?searchtype=author&query=Fowlie%2C+M">Meaghan Fowlie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.03480v1-abstract-short" style="display: inline;"> We present the Granular AMR Parsing Evaluation Suite (GrAPES), a challenge set for Abstract Meaning Representation (AMR) parsing with accompanying evaluation metrics. AMR parsers now obtain high scores on the standard AMR evaluation metric Smatch, close to or even above reported inter-annotator agreement. But that does not mean that AMR parsing is solved; in fact, human evaluation in previous work… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.03480v1-abstract-full').style.display = 'inline'; document.getElementById('2312.03480v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.03480v1-abstract-full" style="display: none;"> We present the Granular AMR Parsing Evaluation Suite (GrAPES), a challenge set for Abstract Meaning Representation (AMR) parsing with accompanying evaluation metrics. AMR parsers now obtain high scores on the standard AMR evaluation metric Smatch, close to or even above reported inter-annotator agreement. But that does not mean that AMR parsing is solved; in fact, human evaluation in previous work indicates that current parsers still quite frequently make errors on node labels or graph structure that substantially distort sentence meaning. Here, we provide an evaluation suite that tests AMR parsers on a range of phenomena of practical, technical, and linguistic interest. Our 36 categories range from seen and unseen labels, to structural generalization, to coreference. GrAPES reveals in depth the abilities and shortcomings of current AMR parsers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.03480v1-abstract-full').style.display = 'none'; document.getElementById('2312.03480v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2023. For the associated GitHub repository, see https://github.com/jgroschwitz/GrAPES</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> J.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.09467">arXiv:2311.09467</a> <span> [<a href="https://arxiv.org/pdf/2311.09467">pdf</a>, <a href="https://arxiv.org/format/2311.09467">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Think While You Write: Hypothesis Verification Promotes Faithful Knowledge-to-Text Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+Y">Yifu Qiu</a>, <a href="/search/cs?searchtype=author&query=Embar%2C+V">Varun Embar</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Han%2C+B">Benjamin Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.09467v2-abstract-short" style="display: inline;"> Knowledge-to-text generators often struggle to faithfully generate descriptions for the input facts: they may produce hallucinations that contradict the input, or describe facts not present in the input. To reduce hallucinations, we propose a decoding-only method, TWEAK (Think While Effectively Articulating Knowledge), which can be integrated with any generator without retraining. TWEAK treats the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09467v2-abstract-full').style.display = 'inline'; document.getElementById('2311.09467v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.09467v2-abstract-full" style="display: none;"> Knowledge-to-text generators often struggle to faithfully generate descriptions for the input facts: they may produce hallucinations that contradict the input, or describe facts not present in the input. To reduce hallucinations, we propose a decoding-only method, TWEAK (Think While Effectively Articulating Knowledge), which can be integrated with any generator without retraining. TWEAK treats the generated sequences at each decoding step and its future sequences as hypotheses, and ranks each generation candidate based on the extent to which their hypotheses are supported by the input facts using a Hypothesis Verification Model (HVM). We first demonstrate the effectiveness of TWEAK by using a Natural Language Inference (NLI) model as the HVM and report improved faithfulness with a minimal impact on the quality. We then replace the NLI model with a task-specific HVM trained with a first-of-a-kind dataset, FATE (Fact-Aligned Textual Entailment), which pairs input facts with their original and perturbed descriptions. We test TWEAK with two generators, and the best TWEAK variants improve on average for the two models by 2.24/7.17 points in faithfulness (FactKB) in in/out-of-distribution evaluations, respectively, and with only a 0.14/0.32-point decline in quality (BERTScore). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09467v2-abstract-full').style.display = 'none'; document.getElementById('2311.09467v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2024 (Findings)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.08704">arXiv:2311.08704</a> <span> [<a href="https://arxiv.org/pdf/2311.08704">pdf</a>, <a href="https://arxiv.org/format/2311.08704">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Can Large Language Models Follow Concept Annotation Guidelines? A Case Study on Scientific and Financial Domains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fonseca%2C+M">Marcio Fonseca</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.08704v2-abstract-short" style="display: inline;"> Although large language models (LLMs) exhibit remarkable capacity to leverage in-context demonstrations, it is still unclear to what extent they can learn new concepts or facts from ground-truth labels. To address this question, we examine the capacity of instruction-tuned LLMs to follow in-context concept guidelines for sentence labeling tasks. We design guidelines that present different types of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08704v2-abstract-full').style.display = 'inline'; document.getElementById('2311.08704v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.08704v2-abstract-full" style="display: none;"> Although large language models (LLMs) exhibit remarkable capacity to leverage in-context demonstrations, it is still unclear to what extent they can learn new concepts or facts from ground-truth labels. To address this question, we examine the capacity of instruction-tuned LLMs to follow in-context concept guidelines for sentence labeling tasks. We design guidelines that present different types of factual and counterfactual concept definitions, which are used as prompts for zero-shot sentence classification tasks. Our results show that although concept definitions consistently help in task performance, only the larger models (with 70B parameters or more) have limited ability to work under counterfactual contexts. Importantly, only proprietary models such as GPT-3.5 and GPT-4 can recognize nonsensical guidelines, which we hypothesize is due to more sophisticated alignment methods. Finally, we find that Falcon-180B-chat is outperformed by Llama-2-70B-chat is most cases, which indicates that careful fine-tuning is more effective than increasing model scale. Altogether, our simple evaluation method reveals significant gaps in concept understanding between the most capable open-source language models and the leading proprietary APIs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08704v2-abstract-full').style.display = 'none'; document.getElementById('2311.08704v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2024 camera ready</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.08398">arXiv:2311.08398</a> <span> [<a href="https://arxiv.org/pdf/2311.08398">pdf</a>, <a href="https://arxiv.org/format/2311.08398">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Are Large Language Models Temporally Grounded? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+Y">Yifu Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Ziser%2C+Y">Yftah Ziser</a>, <a href="/search/cs?searchtype=author&query=Korhonen%2C+A">Anna Korhonen</a>, <a href="/search/cs?searchtype=author&query=Ponti%2C+E+M">Edoardo M. Ponti</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.08398v2-abstract-short" style="display: inline;"> Are Large language models (LLMs) temporally grounded? Since LLMs cannot perceive and interact with the environment, it is impossible to answer this question directly. Instead, we provide LLMs with textual narratives and probe them with respect to their common-sense knowledge of the structure and duration of events, their ability to order events along a timeline, and self-consistency within their t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08398v2-abstract-full').style.display = 'inline'; document.getElementById('2311.08398v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.08398v2-abstract-full" style="display: none;"> Are Large language models (LLMs) temporally grounded? Since LLMs cannot perceive and interact with the environment, it is impossible to answer this question directly. Instead, we provide LLMs with textual narratives and probe them with respect to their common-sense knowledge of the structure and duration of events, their ability to order events along a timeline, and self-consistency within their temporal model (e.g., temporal relations such as after and before are mutually exclusive for any pair of events). We evaluate state-of-the-art LLMs (such as LLaMA 2 and GPT-4) on three tasks reflecting these abilities. Generally, we find that LLMs lag significantly behind both human performance as well as small-scale, specialised LMs. In-context learning, instruction tuning, and chain-of-thought prompting reduce this gap only to a limited degree. Crucially, LLMs struggle the most with self-consistency, displaying incoherent behaviour in at least 27.23% of their predictions. Contrary to expectations, we also find that scaling the model size does not guarantee positive gains in performance. To explain these results, we study the sources from which LLMs may gather temporal information: we find that sentence ordering in unlabelled texts, available during pre-training, is only weakly correlated with event ordering. Moreover, public instruction tuning mixtures contain few temporal tasks. Hence, we conclude that current LLMs lack a consistent temporal model of textual narratives. Code, datasets, and LLM outputs are available at https://github.com/yfqiu-nlp/temporal-llms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08398v2-abstract-full').style.display = 'none'; document.getElementById('2311.08398v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.15513">arXiv:2310.15513</a> <span> [<a href="https://arxiv.org/pdf/2310.15513">pdf</a>, <a href="https://arxiv.org/format/2310.15513">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Joint Matrix Factorization Analysis of Multilingual Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Ziser%2C+Y">Yftah Ziser</a>, <a href="/search/cs?searchtype=author&query=Webber%2C+B">Bonnie Webber</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.15513v1-abstract-short" style="display: inline;"> We present an analysis tool based on joint matrix factorization for comparing latent representations of multilingual and monolingual models. An alternative to probing, this tool allows us to analyze multiple sets of representations in a joint manner. Using this tool, we study to what extent and how morphosyntactic features are reflected in the representations learned by multilingual pre-trained mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15513v1-abstract-full').style.display = 'inline'; document.getElementById('2310.15513v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.15513v1-abstract-full" style="display: none;"> We present an analysis tool based on joint matrix factorization for comparing latent representations of multilingual and monolingual models. An alternative to probing, this tool allows us to analyze multiple sets of representations in a joint manner. Using this tool, we study to what extent and how morphosyntactic features are reflected in the representations learned by multilingual pre-trained models. We conduct a large-scale empirical study of over 33 languages and 17 morphosyntactic categories. Our findings demonstrate variations in the encoding of morphosyntactic information across upper and lower layers, with category-specific differences influenced by language properties. Hierarchical clustering of the factorization outputs yields a tree structure that is related to phylogenetic trees manually crafted by linguists. Moreover, we find the factorization outputs exhibit strong associations with performance observed across different cross-lingual tasks. We release our code to facilitate future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15513v1-abstract-full').style.display = 'none'; document.getElementById('2310.15513v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.19734">arXiv:2305.19734</a> <span> [<a href="https://arxiv.org/pdf/2305.19734">pdf</a>, <a href="https://arxiv.org/format/2305.19734">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Knowledge Base Question Answering for Space Debris Queries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Darm%2C+P">Paul Darm</a>, <a href="/search/cs?searchtype=author&query=Miceli-Barone%2C+A+V">Antonio Valerio Miceli-Barone</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Riccardi%2C+A">Annalisa Riccardi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.19734v1-abstract-short" style="display: inline;"> Space agencies execute complex satellite operations that need to be supported by the technical knowledge contained in their extensive information systems. Knowledge bases (KB) are an effective way of storing and accessing such information at scale. In this work we present a system, developed for the European Space Agency (ESA), that can answer complex natural language queries, to support engineers… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19734v1-abstract-full').style.display = 'inline'; document.getElementById('2305.19734v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.19734v1-abstract-full" style="display: none;"> Space agencies execute complex satellite operations that need to be supported by the technical knowledge contained in their extensive information systems. Knowledge bases (KB) are an effective way of storing and accessing such information at scale. In this work we present a system, developed for the European Space Agency (ESA), that can answer complex natural language queries, to support engineers in accessing the information contained in a KB that models the orbital space debris environment. Our system is based on a pipeline which first generates a sequence of basic database operations, called a %program sketch, from a natural language question, then specializes the sketch into a concrete query program with mentions of entities, attributes and relations, and finally executes the program against the database. This pipeline decomposition approach enables us to train the system by leveraging out-of-domain data and semi-synthetic data generated by GPT-3, thus reducing overfitting and shortcut learning even with limited amount of in-domain training data. Our code can be found at \url{https://github.com/PaulDrm/DISCOSQA}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.19734v1-abstract-full').style.display = 'none'; document.getElementById('2305.19734v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, ACL 2023 industry track</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.16947">arXiv:2305.16947</a> <span> [<a href="https://arxiv.org/pdf/2305.16947">pdf</a>, <a href="https://arxiv.org/format/2305.16947">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Sentence-Incremental Neural Coreference Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Grenander%2C+M">Matt Grenander</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Steedman%2C+M">Mark Steedman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.16947v1-abstract-short" style="display: inline;"> We propose a sentence-incremental neural coreference resolution system which incrementally builds clusters after marking mention boundaries in a shift-reduce method. The system is aimed at bridging two recent approaches at coreference resolution: (1) state-of-the-art non-incremental models that incur quadratic complexity in document length with high computational cost, and (2) memory network-based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16947v1-abstract-full').style.display = 'inline'; document.getElementById('2305.16947v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.16947v1-abstract-full" style="display: none;"> We propose a sentence-incremental neural coreference resolution system which incrementally builds clusters after marking mention boundaries in a shift-reduce method. The system is aimed at bridging two recent approaches at coreference resolution: (1) state-of-the-art non-incremental models that incur quadratic complexity in document length with high computational cost, and (2) memory network-based models which operate incrementally but do not generalize beyond pronouns. For comparison, we simulate an incremental setting by constraining non-incremental systems to form partial coreference chains before observing new sentences. In this setting, our system outperforms comparable state-of-the-art methods by 2 F1 on OntoNotes and 7 F1 on the CODI-CRAC 2021 corpus. In a conventional coreference setup, our system achieves 76.3 F1 on OntoNotes and 45.8 F1 on CODI-CRAC 2021, which is comparable to state-of-the-art baselines. We also analyze variations of our system and show that the degree of incrementality in the encoder has a surprisingly large effect on the resulting performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16947v1-abstract-full').style.display = 'none'; document.getElementById('2305.16947v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.15507">arXiv:2305.15507</a> <span> [<a href="https://arxiv.org/pdf/2305.15507">pdf</a>, <a href="https://arxiv.org/format/2305.15507">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> The Larger They Are, the Harder They Fail: Language Models do not Recognize Identifier Swaps in Python </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Miceli-Barone%2C+A+V">Antonio Valerio Miceli-Barone</a>, <a href="/search/cs?searchtype=author&query=Barez%2C+F">Fazl Barez</a>, <a href="/search/cs?searchtype=author&query=Konstas%2C+I">Ioannis Konstas</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.15507v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have successfully been applied to code generation tasks, raising the question of how well these models understand programming. Typical programming languages have invariances and equivariances in their semantics that human programmers intuitively understand and exploit, such as the (near) invariance to the renaming of identifiers. We show that LLMs not only fail to prop… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15507v1-abstract-full').style.display = 'inline'; document.getElementById('2305.15507v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.15507v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have successfully been applied to code generation tasks, raising the question of how well these models understand programming. Typical programming languages have invariances and equivariances in their semantics that human programmers intuitively understand and exploit, such as the (near) invariance to the renaming of identifiers. We show that LLMs not only fail to properly generate correct Python code when default function names are swapped, but some of them even become more confident in their incorrect predictions as the model size increases, an instance of the recently discovered phenomenon of Inverse Scaling, which runs contrary to the commonly observed trend of increasing prediction quality with increasing model size. Our findings indicate that, despite their astonishing typical-case performance, LLMs still lack a deep, abstract understanding of the content they manipulate, making them unsuitable for tasks that statistically deviate from their training data, and that mere scaling is not enough to achieve such capability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15507v1-abstract-full').style.display = 'none'; document.getElementById('2305.15507v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 5 figure, ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.13632">arXiv:2305.13632</a> <span> [<a href="https://arxiv.org/pdf/2305.13632">pdf</a>, <a href="https://arxiv.org/format/2305.13632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Detecting and Mitigating Hallucinations in Multilingual Summarisation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+Y">Yifu Qiu</a>, <a href="/search/cs?searchtype=author&query=Ziser%2C+Y">Yftah Ziser</a>, <a href="/search/cs?searchtype=author&query=Korhonen%2C+A">Anna Korhonen</a>, <a href="/search/cs?searchtype=author&query=Ponti%2C+E+M">Edoardo M. Ponti</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.13632v2-abstract-short" style="display: inline;"> Hallucinations pose a significant challenge to the reliability of neural models for abstractive summarisation. While automatically generated summaries may be fluent, they often lack faithfulness to the original document. This issue becomes even more pronounced in low-resource settings, such as cross-lingual transfer. With the existing faithful metrics focusing on English, even measuring the extent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13632v2-abstract-full').style.display = 'inline'; document.getElementById('2305.13632v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.13632v2-abstract-full" style="display: none;"> Hallucinations pose a significant challenge to the reliability of neural models for abstractive summarisation. While automatically generated summaries may be fluent, they often lack faithfulness to the original document. This issue becomes even more pronounced in low-resource settings, such as cross-lingual transfer. With the existing faithful metrics focusing on English, even measuring the extent of this phenomenon in cross-lingual settings is hard. To address this, we first develop a novel metric, mFACT, evaluating the faithfulness of non-English summaries, leveraging translation-based transfer from multiple English faithfulness metrics. We then propose a simple but effective method to reduce hallucinations with a cross-lingual transfer, which weighs the loss of each training example by its faithfulness score. Through extensive experiments in multiple languages, we demonstrate that mFACT is the metric that is most suited to detect hallucinations. Moreover, we find that our proposed loss weighting method drastically increases both performance and faithfulness according to both automatic and human evaluation when compared to strong baselines for cross-lingual transfer such as MAD-X. Our code and dataset are available at https://github.com/yfqiu-nlp/mfact-summ. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13632v2-abstract-full').style.display = 'none'; document.getElementById('2305.13632v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.08828">arXiv:2305.08828</a> <span> [<a href="https://arxiv.org/pdf/2305.08828">pdf</a>, <a href="https://arxiv.org/format/2305.08828">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> PMIndiaSum: Multilingual and Cross-lingual Headline Summarization for Languages in India </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Urlana%2C+A">Ashok Urlana</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+P">Pinzhen Chen</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Shrivastava%2C+M">Manish Shrivastava</a>, <a href="/search/cs?searchtype=author&query=Haddow%2C+B">Barry Haddow</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.08828v2-abstract-short" style="display: inline;"> This paper introduces PMIndiaSum, a multilingual and massively parallel summarization corpus focused on languages in India. Our corpus provides a training and testing ground for four language families, 14 languages, and the largest to date with 196 language pairs. We detail our construction workflow including data acquisition, processing, and quality assurance. Furthermore, we publish benchmarks f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.08828v2-abstract-full').style.display = 'inline'; document.getElementById('2305.08828v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.08828v2-abstract-full" style="display: none;"> This paper introduces PMIndiaSum, a multilingual and massively parallel summarization corpus focused on languages in India. Our corpus provides a training and testing ground for four language families, 14 languages, and the largest to date with 196 language pairs. We detail our construction workflow including data acquisition, processing, and quality assurance. Furthermore, we publish benchmarks for monolingual, cross-lingual, and multilingual summarization by fine-tuning, prompting, as well as translate-and-summarize. Experimental results confirm the crucial role of our data in aiding summarization between Indian languages. Our dataset is publicly available and can be freely modified and re-distributed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.08828v2-abstract-full').style.display = 'none'; document.getElementById('2305.08828v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings of EMNLP 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.10809">arXiv:2302.10809</a> <span> [<a href="https://arxiv.org/pdf/2302.10809">pdf</a>, <a href="https://arxiv.org/format/2302.10809">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Causal Explanations for Sequential Decision-Making in Multi-Agent Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gyevnar%2C+B">Balint Gyevnar</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Cheng Wang</a>, <a href="/search/cs?searchtype=author&query=Lucas%2C+C+G">Christopher G. Lucas</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Albrecht%2C+S+V">Stefano V. Albrecht</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.10809v4-abstract-short" style="display: inline;"> We present CEMA: Causal Explanations in Multi-Agent systems; a framework for creating causal natural language explanations of an agent's decisions in dynamic sequential multi-agent systems to build more trustworthy autonomous agents. Unlike prior work that assumes a fixed causal structure, CEMA only requires a probabilistic model for forward-simulating the state of the system. Using such a model,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.10809v4-abstract-full').style.display = 'inline'; document.getElementById('2302.10809v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.10809v4-abstract-full" style="display: none;"> We present CEMA: Causal Explanations in Multi-Agent systems; a framework for creating causal natural language explanations of an agent's decisions in dynamic sequential multi-agent systems to build more trustworthy autonomous agents. Unlike prior work that assumes a fixed causal structure, CEMA only requires a probabilistic model for forward-simulating the state of the system. Using such a model, CEMA simulates counterfactual worlds that identify the salient causes behind the agent's decisions. We evaluate CEMA on the task of motion planning for autonomous driving and test it in diverse simulated scenarios. We show that CEMA correctly and robustly identifies the causes behind the agent's decisions, even when a large number of other agents is present, and show via a user study that CEMA's explanations have a positive effect on participants' trust in autonomous vehicles and are rated as high as high-quality baseline explanations elicited from other participants. We release the collected explanations with annotations as the HEADD dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.10809v4-abstract-full').style.display = 'none'; document.getElementById('2302.10809v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in 23rd International Conference on Autonomous Agents and Multi-Agent Systems (AAMAS), 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.9 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.09350">arXiv:2302.09350</a> <span> [<a href="https://arxiv.org/pdf/2302.09350">pdf</a>, <a href="https://arxiv.org/format/2302.09350">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BERT is not The Count: Learning to Match Mathematical Statements with Proofs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+W+W">Weixian Waylon Li</a>, <a href="/search/cs?searchtype=author&query=Ziser%2C+Y">Yftah Ziser</a>, <a href="/search/cs?searchtype=author&query=Coavoux%2C+M">Maximin Coavoux</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.09350v1-abstract-short" style="display: inline;"> We introduce a task consisting in matching a proof to a given mathematical statement. The task fits well within current research on Mathematical Information Retrieval and, more generally, mathematical article analysis (Mathematical Sciences, 2014). We present a dataset for the task (the MATcH dataset) consisting of over 180k statement-proof pairs extracted from modern mathematical research article… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09350v1-abstract-full').style.display = 'inline'; document.getElementById('2302.09350v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.09350v1-abstract-full" style="display: none;"> We introduce a task consisting in matching a proof to a given mathematical statement. The task fits well within current research on Mathematical Information Retrieval and, more generally, mathematical article analysis (Mathematical Sciences, 2014). We present a dataset for the task (the MATcH dataset) consisting of over 180k statement-proof pairs extracted from modern mathematical research articles. We find this dataset highly representative of our task, as it consists of relatively new findings useful to mathematicians. We propose a bilinear similarity model and two decoding methods to match statements to proofs effectively. While the first decoding method matches a proof to a statement without being aware of other statements or proofs, the second method treats the task as a global matching problem. Through a symbol replacement procedure, we analyze the "insights" that pre-trained language models have in such mathematical article analysis and show that while these models perform well on this task with the best performing mean reciprocal rank of 73.7, they follow a relatively shallow symbolic analysis and matching to achieve that performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09350v1-abstract-full').style.display = 'none'; document.getElementById('2302.09350v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the Conference of the European Chapter of the Association for Computational Linguistics (EACL), 2023; 14 pages. arXiv admin note: substantial text overlap with arXiv:2102.02110</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.09458">arXiv:2211.09458</a> <span> [<a href="https://arxiv.org/pdf/2211.09458">pdf</a>, <a href="https://arxiv.org/format/2211.09458">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Abstractive Summarization Guided by Latent Hierarchical Document Structure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+Y">Yifu Qiu</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.09458v1-abstract-short" style="display: inline;"> Sequential abstractive neural summarizers often do not use the underlying structure in the input article or dependencies between the input sentences. This structure is essential to integrate and consolidate information from different parts of the text. To address this shortcoming, we propose a hierarchy-aware graph neural network (HierGNN) which captures such dependencies through three main steps:… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.09458v1-abstract-full').style.display = 'inline'; document.getElementById('2211.09458v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.09458v1-abstract-full" style="display: none;"> Sequential abstractive neural summarizers often do not use the underlying structure in the input article or dependencies between the input sentences. This structure is essential to integrate and consolidate information from different parts of the text. To address this shortcoming, we propose a hierarchy-aware graph neural network (HierGNN) which captures such dependencies through three main steps: 1) learning a hierarchical document structure through a latent structure tree learned by a sparse matrix-tree computation; 2) propagating sentence information over this structure using a novel message-passing node propagation mechanism to identify salient information; 3) using graph-level attention to concentrate the decoder on salient information. Experiments confirm HierGNN improves strong sequence models such as BART, with a 0.55 and 0.75 margin in average ROUGE-1/2/L for CNN/DM and XSum. Further human evaluation demonstrates that summaries produced by our model are more relevant and less redundant than the baselines, into which HierGNN is incorporated. We also find HierGNN synthesizes summaries by fusing multiple source sentences more, rather than compressing a single source sentence, and that it processes long inputs more effectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.09458v1-abstract-full').style.display = 'none'; document.getElementById('2211.09458v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2022, 15 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.12553">arXiv:2210.12553</a> <span> [<a href="https://arxiv.org/pdf/2210.12553">pdf</a>, <a href="https://arxiv.org/format/2210.12553">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Understanding Domain Learning in Language Models Through Subpopulation Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Ziser%2C+Y">Yftah Ziser</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.12553v1-abstract-short" style="display: inline;"> We investigate how different domains are encoded in modern neural network architectures. We analyze the relationship between natural language domains, model size, and the amount of training data used. The primary analysis tool we develop is based on subpopulation analysis with Singular Vector Canonical Correlation Analysis (SVCCA), which we apply to Transformer-based language models (LMs). We comp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12553v1-abstract-full').style.display = 'inline'; document.getElementById('2210.12553v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.12553v1-abstract-full" style="display: none;"> We investigate how different domains are encoded in modern neural network architectures. We analyze the relationship between natural language domains, model size, and the amount of training data used. The primary analysis tool we develop is based on subpopulation analysis with Singular Vector Canonical Correlation Analysis (SVCCA), which we apply to Transformer-based language models (LMs). We compare the latent representations of such a language model at its different layers from a pair of models: a model trained on multiple domains (an experimental model) and a model trained on a single domain (a control model). Through our method, we find that increasing the model capacity impacts how domain information is stored in upper and lower layers differently. In addition, we show that larger experimental models simultaneously embed domain-specific information as if they were conjoined control models. These findings are confirmed qualitatively, demonstrating the validity of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.12553v1-abstract-full').style.display = 'none'; document.getElementById('2210.12553v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to BlackboxNLP 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.08783">arXiv:2206.08783</a> <span> [<a href="https://arxiv.org/pdf/2206.08783">pdf</a>, <a href="https://arxiv.org/format/2206.08783">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.48550/arXiv.2206.08783">10.48550/arXiv.2206.08783 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Human-Centric Method for Generating Causal Explanations in Natural Language for Autonomous Vehicle Motion Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gyevnar%2C+B">Balint Gyevnar</a>, <a href="/search/cs?searchtype=author&query=Tamborski%2C+M">Massimiliano Tamborski</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Cheng Wang</a>, <a href="/search/cs?searchtype=author&query=Lucas%2C+C+G">Christopher G. Lucas</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Albrecht%2C+S+V">Stefano V. Albrecht</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.08783v2-abstract-short" style="display: inline;"> Inscrutable AI systems are difficult to trust, especially if they operate in safety-critical settings like autonomous driving. Therefore, there is a need to build transparent and queryable systems to increase trust levels. We propose a transparent, human-centric explanation generation method for autonomous vehicle motion planning and prediction based on an existing white-box system called IGP2. Ou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.08783v2-abstract-full').style.display = 'inline'; document.getElementById('2206.08783v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.08783v2-abstract-full" style="display: none;"> Inscrutable AI systems are difficult to trust, especially if they operate in safety-critical settings like autonomous driving. Therefore, there is a need to build transparent and queryable systems to increase trust levels. We propose a transparent, human-centric explanation generation method for autonomous vehicle motion planning and prediction based on an existing white-box system called IGP2. Our method integrates Bayesian networks with context-free generative rules and can give causal natural language explanations for the high-level driving behaviour of autonomous vehicles. Preliminary testing on simulated scenarios shows that our method captures the causes behind the actions of autonomous vehicles and generates intelligible explanations with varying complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.08783v2-abstract-full').style.display = 'none'; document.getElementById('2206.08783v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IJCAI Workshop on Artificial Intelligence for Autonomous Driving (AI4AD), 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.12486">arXiv:2205.12486</a> <span> [<a href="https://arxiv.org/pdf/2205.12486">pdf</a>, <a href="https://arxiv.org/format/2205.12486">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Factorizing Content and Budget Decisions in Abstractive Summarization of Long Documents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fonseca%2C+M">Marcio Fonseca</a>, <a href="/search/cs?searchtype=author&query=Ziser%2C+Y">Yftah Ziser</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.12486v2-abstract-short" style="display: inline;"> We argue that disentangling content selection from the budget used to cover salient content improves the performance and applicability of abstractive summarizers. Our method, FactorSum, does this disentanglement by factorizing summarization into two steps through an energy function: (1) generation of abstractive summary views; (2) combination of these views into a final summary, following a budget… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12486v2-abstract-full').style.display = 'inline'; document.getElementById('2205.12486v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.12486v2-abstract-full" style="display: none;"> We argue that disentangling content selection from the budget used to cover salient content improves the performance and applicability of abstractive summarizers. Our method, FactorSum, does this disentanglement by factorizing summarization into two steps through an energy function: (1) generation of abstractive summary views; (2) combination of these views into a final summary, following a budget and content guidance. This guidance may come from different sources, including from an advisor model such as BART or BigBird, or in oracle mode -- from the reference. This factorization achieves significantly higher ROUGE scores on multiple benchmarks for long document summarization, namely PubMed, arXiv, and GovReport. Most notably, our model is effective for domain adaptation. When trained only on PubMed samples, it achieves a 46.29 ROUGE-1 score on arXiv, which indicates a strong performance due to more flexible budget adaptation and content selection less dependent on domain-specific textual structure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12486v2-abstract-full').style.display = 'none'; document.getElementById('2205.12486v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2022 camera ready</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.10192">arXiv:2205.10192</a> <span> [<a href="https://arxiv.org/pdf/2205.10192">pdf</a>, <a href="https://arxiv.org/format/2205.10192">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1613/jair.1.15191">10.1613/jair.1.15191 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> On the Trade-off between Redundancy and Local Coherence in Summarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cardenas%2C+R">Ronald Cardenas</a>, <a href="/search/cs?searchtype=author&query=Galle%2C+M">Matthias Galle</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.10192v2-abstract-short" style="display: inline;"> Extractive summaries are usually presented as lists of sentences with no expected cohesion between them and with plenty of redundant information if not accounted for. In this paper, we investigate the trade-offs incurred when aiming to control for inter-sentential cohesion and redundancy in extracted summaries, and their impact on their informativeness. As case study, we focus on the summarization… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.10192v2-abstract-full').style.display = 'inline'; document.getElementById('2205.10192v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.10192v2-abstract-full" style="display: none;"> Extractive summaries are usually presented as lists of sentences with no expected cohesion between them and with plenty of redundant information if not accounted for. In this paper, we investigate the trade-offs incurred when aiming to control for inter-sentential cohesion and redundancy in extracted summaries, and their impact on their informativeness. As case study, we focus on the summarization of long, highly redundant documents and consider two optimization scenarios, reward-guided and with no supervision. In the reward-guided scenario, we compare systems that control for redundancy and cohesion during sentence scoring. In the unsupervised scenario, we introduce two systems that aim to control all three properties -- informativeness, redundancy, and cohesion -- in a principled way. Both systems implement a psycholinguistic theory that simulates how humans keep track of relevant content units and how cohesion and non-redundancy constraints are applied in short-term memory during reading. Extensive automatic and human evaluations reveal that systems optimizing for -- among other properties -- cohesion are capable of better organizing content in summaries compared to systems that optimize only for redundancy, while maintaining comparable informativeness. We find that the proposed unsupervised systems manage to extract highly cohesive summaries across varying levels of document redundancy, although sacrificing informativeness in the process. Finally, we lay evidence as to how simulated cognitive processes impact the trade-off between the analyzed summary properties. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.10192v2-abstract-full').style.display = 'none'; document.getElementById('2205.10192v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to JAIR</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Journal of Artificial Intelligence Research, 80, 273-326 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.07893">arXiv:2203.07893</a> <span> [<a href="https://arxiv.org/pdf/2203.07893">pdf</a>, <a href="https://arxiv.org/format/2203.07893">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Gold Doesn't Always Glitter: Spectral Removal of Linear and Nonlinear Guarded Attribute Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+S">Shun Shao</a>, <a href="/search/cs?searchtype=author&query=Ziser%2C+Y">Yftah Ziser</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.07893v4-abstract-short" style="display: inline;"> We describe a simple and effective method (Spectral Attribute removaL; SAL) to remove private or guarded information from neural representations. Our method uses matrix decomposition to project the input representations into directions with reduced covariance with the guarded information rather than maximal covariance as factorization methods normally use. We begin with linear information removal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.07893v4-abstract-full').style.display = 'inline'; document.getElementById('2203.07893v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.07893v4-abstract-full" style="display: none;"> We describe a simple and effective method (Spectral Attribute removaL; SAL) to remove private or guarded information from neural representations. Our method uses matrix decomposition to project the input representations into directions with reduced covariance with the guarded information rather than maximal covariance as factorization methods normally use. We begin with linear information removal and proceed to generalize our algorithm to the case of nonlinear information removal using kernels. Our experiments demonstrate that our algorithm retains better main task performance after removing the guarded information compared to previous work. In addition, our experiments demonstrate that we need a relatively small amount of guarded attribute data to remove information about these attributes, which lowers the exposure to sensitive data and is more suitable for low-resource scenarios. Code is available at https://github.com/jasonshaoshun/SAL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.07893v4-abstract-full').style.display = 'none'; document.getElementById('2203.07893v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the Conference of the European Chapter of the Association for Computational Linguistics (EACL), 2023; 12 pages (minor formatting corrections)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.02283">arXiv:2110.02283</a> <span> [<a href="https://arxiv.org/pdf/2110.02283">pdf</a>, <a href="https://arxiv.org/format/2110.02283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Co-training an Unsupervised Constituency Parser with Weak Supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Maveli%2C+N">Nickil Maveli</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.02283v2-abstract-short" style="display: inline;"> We introduce a method for unsupervised parsing that relies on bootstrapping classifiers to identify if a node dominates a specific span in a sentence. There are two types of classifiers, an inside classifier that acts on a span, and an outside classifier that acts on everything outside of a given span. Through self-training and co-training with the two classifiers, we show that the interplay betwe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.02283v2-abstract-full').style.display = 'inline'; document.getElementById('2110.02283v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.02283v2-abstract-full" style="display: none;"> We introduce a method for unsupervised parsing that relies on bootstrapping classifiers to identify if a node dominates a specific span in a sentence. There are two types of classifiers, an inside classifier that acts on a span, and an outside classifier that acts on everything outside of a given span. Through self-training and co-training with the two classifiers, we show that the interplay between them helps improve the accuracy of both, and as a result, effectively parse. A seed bootstrapping technique prepares the data to train these classifiers. Our analyses further validate that such an approach in conjunction with weak supervision using prior branching knowledge of a known language (left/right-branching) and minimal heuristics injects strong inductive bias into the parser, achieving 63.1 F$_1$ on the English (PTB) test set. In addition, we show the effectiveness of our architecture by evaluating on treebanks for Chinese (CTB) and Japanese (KTB) and achieve new state-of-the-art results. Our code and pre-trained models are available at https://github.com/Nickil21/weakly-supervised-parsing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.02283v2-abstract-full').style.display = 'none'; document.getElementById('2110.02283v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of ACL 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.08392">arXiv:2104.08392</a> <span> [<a href="https://arxiv.org/pdf/2104.08392">pdf</a>, <a href="https://arxiv.org/format/2104.08392">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised Extractive Summarization by Human Memory Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cardenas%2C+R">Ronald Cardenas</a>, <a href="/search/cs?searchtype=author&query=Galle%2C+M">Matthias Galle</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.08392v1-abstract-short" style="display: inline;"> Summarization systems face the core challenge of identifying and selecting important information. In this paper, we tackle the problem of content selection in unsupervised extractive summarization of long, structured documents. We introduce a wide range of heuristics that leverage cognitive representations of content units and how these are retained or forgotten in human memory. We find that prope… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.08392v1-abstract-full').style.display = 'inline'; document.getElementById('2104.08392v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.08392v1-abstract-full" style="display: none;"> Summarization systems face the core challenge of identifying and selecting important information. In this paper, we tackle the problem of content selection in unsupervised extractive summarization of long, structured documents. We introduce a wide range of heuristics that leverage cognitive representations of content units and how these are retained or forgotten in human memory. We find that properties of these representations of human memory can be exploited to capture relevance of content units in scientific articles. Experiments show that our proposed heuristics are effective at leveraging cognitive structures and the organization of the document (i.e.\ sections of an article), and automatic and human evaluations provide strong evidence that these heuristics extract more summary-worthy content units. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.08392v1-abstract-full').style.display = 'none'; document.getElementById('2104.08392v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.02110">arXiv:2102.02110</a> <span> [<a href="https://arxiv.org/pdf/2102.02110">pdf</a>, <a href="https://arxiv.org/format/2102.02110">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Learning to Match Mathematical Statements with Proofs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Coavoux%2C+M">Maximin Coavoux</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.02110v1-abstract-short" style="display: inline;"> We introduce a novel task consisting in assigning a proof to a given mathematical statement. The task is designed to improve the processing of research-level mathematical texts. Applying Natural Language Processing (NLP) tools to research level mathematical articles is both challenging, since it is a highly specialized domain which mixes natural language and mathematical formulae. It is also an im… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.02110v1-abstract-full').style.display = 'inline'; document.getElementById('2102.02110v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.02110v1-abstract-full" style="display: none;"> We introduce a novel task consisting in assigning a proof to a given mathematical statement. The task is designed to improve the processing of research-level mathematical texts. Applying Natural Language Processing (NLP) tools to research level mathematical articles is both challenging, since it is a highly specialized domain which mixes natural language and mathematical formulae. It is also an important requirement for developing tools for mathematical information retrieval and computer-assisted theorem proving. We release a dataset for the task, consisting of over 180k statement-proof pairs extracted from mathematical research articles. We carry out preliminary experiments to assess the difficulty of the task. We first experiment with two bag-of-words baselines. We show that considering the assignment problem globally and using weighted bipartite matching algorithms helps a lot in tackling the task. Finally, we introduce a self-attention-based model that can be trained either locally or globally and outperforms baselines by a wide margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.02110v1-abstract-full').style.display = 'none'; document.getElementById('2102.02110v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.06803">arXiv:2101.06803</a> <span> [<a href="https://arxiv.org/pdf/2101.06803">pdf</a>, <a href="https://arxiv.org/format/2101.06803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Narration Generation for Cartoon Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Papasarantopoulos%2C+N">Nikos Papasarantopoulos</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.06803v1-abstract-short" style="display: inline;"> Research on text generation from multimodal inputs has largely focused on static images, and less on video data. In this paper, we propose a new task, narration generation, that is complementing videos with narration texts that are to be interjected in several places. The narrations are part of the video and contribute to the storyline unfolding in it. Moreover, they are context-informed, since th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.06803v1-abstract-full').style.display = 'inline'; document.getElementById('2101.06803v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.06803v1-abstract-full" style="display: none;"> Research on text generation from multimodal inputs has largely focused on static images, and less on video data. In this paper, we propose a new task, narration generation, that is complementing videos with narration texts that are to be interjected in several places. The narrations are part of the video and contribute to the storyline unfolding in it. Moreover, they are context-informed, since they include information appropriate for the timeframe of video they cover, and also, do not need to include every detail shown in input scenes, as a caption would. We collect a new dataset from the animated television series Peppa Pig. Furthermore, we formalize the task of narration generation as including two separate tasks, timing and content generation, and present a set of models on the new task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.06803v1-abstract-full').style.display = 'none'; document.getElementById('2101.06803v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.12676">arXiv:2010.12676</a> <span> [<a href="https://arxiv.org/pdf/2010.12676">pdf</a>, <a href="https://arxiv.org/format/2010.12676">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Differentiable Relaxation of Graph Segmentation and Alignment for AMR Parsing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+C">Chunchuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Titov%2C+I">Ivan Titov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.12676v2-abstract-short" style="display: inline;"> Abstract Meaning Representations (AMR) are a broad-coverage semantic formalism which represents sentence meaning as a directed acyclic graph. To train most AMR parsers, one needs to segment the graph into subgraphs and align each such subgraph to a word in a sentence; this is normally done at preprocessing, relying on hand-crafted rules. In contrast, we treat both alignment and segmentation as lat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.12676v2-abstract-full').style.display = 'inline'; document.getElementById('2010.12676v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.12676v2-abstract-full" style="display: none;"> Abstract Meaning Representations (AMR) are a broad-coverage semantic formalism which represents sentence meaning as a directed acyclic graph. To train most AMR parsers, one needs to segment the graph into subgraphs and align each such subgraph to a word in a sentence; this is normally done at preprocessing, relying on hand-crafted rules. In contrast, we treat both alignment and segmentation as latent variables in our model and induce them as part of end-to-end training. As marginalizing over the structured latent variables is infeasible, we use the variational autoencoding framework. To ensure end-to-end differentiable optimization, we introduce a differentiable relaxation of the segmentation and alignment problems. We observe that inducing segmentation yields substantial gains over using a `greedy' segmentation heuristic. The performance of our method also approaches that of a model that relies on the segmentation rules of \citet{lyu-titov-2018-amr}, which were hand-crafted to handle individual AMR constructions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.12676v2-abstract-full').style.display = 'none'; document.getElementById('2010.12676v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.04383">arXiv:2010.04383</a> <span> [<a href="https://arxiv.org/pdf/2010.04383">pdf</a>, <a href="https://arxiv.org/format/2010.04383">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Lightweight, Dynamic Graph Convolutional Networks for AMR-to-Text Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yan Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhijiang Guo</a>, <a href="/search/cs?searchtype=author&query=Teng%2C+Z">Zhiyang Teng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+W">Wei Lu</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zuozhu Liu</a>, <a href="/search/cs?searchtype=author&query=Bing%2C+L">Lidong Bing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.04383v1-abstract-short" style="display: inline;"> AMR-to-text generation is used to transduce Abstract Meaning Representation structures (AMR) into text. A key challenge in this task is to efficiently learn effective graph representations. Previously, Graph Convolution Networks (GCNs) were used to encode input AMRs, however, vanilla GCNs are not able to capture non-local information and additionally, they follow a local (first-order) information… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.04383v1-abstract-full').style.display = 'inline'; document.getElementById('2010.04383v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.04383v1-abstract-full" style="display: none;"> AMR-to-text generation is used to transduce Abstract Meaning Representation structures (AMR) into text. A key challenge in this task is to efficiently learn effective graph representations. Previously, Graph Convolution Networks (GCNs) were used to encode input AMRs, however, vanilla GCNs are not able to capture non-local information and additionally, they follow a local (first-order) information aggregation scheme. To account for these issues, larger and deeper GCN models are required to capture more complex interactions. In this paper, we introduce a dynamic fusion mechanism, proposing Lightweight Dynamic Graph Convolutional Networks (LDGCNs) that capture richer non-local interactions by synthesizing higher order information from the input graphs. We further develop two novel parameter saving strategies based on the group graph convolutions and weight tied convolutions to reduce memory usage and model complexity. With the help of these strategies, we are able to train a model with fewer parameters while maintaining the model capacity. Experiments demonstrate that LDGCNs outperform state-of-the-art models on two benchmark datasets for AMR-to-text generation with significantly fewer parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.04383v1-abstract-full').style.display = 'none'; document.getElementById('2010.04383v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2020, long paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.13312">arXiv:2009.13312</a> <span> [<a href="https://arxiv.org/pdf/2009.13312">pdf</a>, <a href="https://arxiv.org/format/2009.13312">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Reducing Quantity Hallucinations in Abstractive Summarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zheng Zhao</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Webber%2C+B">Bonnie Webber</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.13312v1-abstract-short" style="display: inline;"> It is well-known that abstractive summaries are subject to hallucination---including material that is not supported by the original text. While summaries can be made hallucination-free by limiting them to general phrases, such summaries would fail to be very informative. Alternatively, one can try to avoid hallucinations by verifying that any specific entities in the summary appear in the original… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.13312v1-abstract-full').style.display = 'inline'; document.getElementById('2009.13312v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.13312v1-abstract-full" style="display: none;"> It is well-known that abstractive summaries are subject to hallucination---including material that is not supported by the original text. While summaries can be made hallucination-free by limiting them to general phrases, such summaries would fail to be very informative. Alternatively, one can try to avoid hallucinations by verifying that any specific entities in the summary appear in the original text in a similar context. This is the approach taken by our system, Herman. The system learns to recognize and verify quantity entities (dates, numbers, sums of money, etc.) in a beam-worth of abstractive summaries produced by state-of-the-art models, in order to up-rank those summaries whose quantity terms are supported by the original text. Experimental results demonstrate that the ROUGE scores of such up-ranked summaries have a higher Precision than summaries that have not been up-ranked, without a comparable loss in Recall, resulting in higher F$_1$. Preliminary human evaluation of up-ranked vs. original summaries shows people's preference for the former. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.13312v1-abstract-full').style.display = 'none'; document.getElementById('2009.13312v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Findings of EMNLP 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.07648">arXiv:2008.07648</a> <span> [<a href="https://arxiv.org/pdf/2008.07648">pdf</a>, <a href="https://arxiv.org/format/2008.07648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Nonparametric Learning of Two-Layer ReLU Residual Units </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhunxuan Wang</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Linyun He</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+C">Chunchuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.07648v3-abstract-short" style="display: inline;"> We describe an algorithm that learns two-layer residual units using rectified linear unit (ReLU) activation: suppose the input $\mathbf{x}$ is from a distribution with support space $\mathbb{R}^d$ and the ground-truth generative model is a residual unit of this type, given by $\mathbf{y} = \boldsymbol{B}^\ast\left[\left(\boldsymbol{A}^\ast\mathbf{x}\right)^+ + \mathbf{x}\right]$, where ground-trut… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.07648v3-abstract-full').style.display = 'inline'; document.getElementById('2008.07648v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.07648v3-abstract-full" style="display: none;"> We describe an algorithm that learns two-layer residual units using rectified linear unit (ReLU) activation: suppose the input $\mathbf{x}$ is from a distribution with support space $\mathbb{R}^d$ and the ground-truth generative model is a residual unit of this type, given by $\mathbf{y} = \boldsymbol{B}^\ast\left[\left(\boldsymbol{A}^\ast\mathbf{x}\right)^+ + \mathbf{x}\right]$, where ground-truth network parameters $\boldsymbol{A}^\ast \in \mathbb{R}^{d\times d}$ represent a full-rank matrix with nonnegative entries and $\boldsymbol{B}^\ast \in \mathbb{R}^{m\times d}$ is full-rank with $m \geq d$ and for $\boldsymbol{c} \in \mathbb{R}^d$, $[\boldsymbol{c}^{+}]_i = \max\{0, c_i\}$. We design layer-wise objectives as functionals whose analytic minimizers express the exact ground-truth network in terms of its parameters and nonlinearities. Following this objective landscape, learning residual units from finite samples can be formulated using convex optimization of a nonparametric function: for each layer, we first formulate the corresponding empirical risk minimization (ERM) as a positive semi-definite quadratic program (QP), then we show the solution space of the QP can be equivalently determined by a set of linear inequalities, which can then be efficiently solved by linear programming (LP). We further prove the strong statistical consistency of our algorithm, and demonstrate its robustness and sample efficiency through experimental results on synthetic data and a set of benchmark regression datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.07648v3-abstract-full').style.display = 'none'; document.getElementById('2008.07648v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Transactions on Machine Learning Research (11/2022), slightly typographically revised</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.11054">arXiv:2004.11054</a> <span> [<a href="https://arxiv.org/pdf/2004.11054">pdf</a>, <a href="https://arxiv.org/format/2004.11054">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Learning Dialog Policies from Weak Demonstrations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gordon-Hall%2C+G">Gabriel Gordon-Hall</a>, <a href="/search/cs?searchtype=author&query=Gorinski%2C+P+J">Philip John Gorinski</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.11054v2-abstract-short" style="display: inline;"> Deep reinforcement learning is a promising approach to training a dialog manager, but current methods struggle with the large state and action spaces of multi-domain dialog systems. Building upon Deep Q-learning from Demonstrations (DQfD), an algorithm that scores highly in difficult Atari games, we leverage dialog data to guide the agent to successfully respond to a user's requests. We make progr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.11054v2-abstract-full').style.display = 'inline'; document.getElementById('2004.11054v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.11054v2-abstract-full" style="display: none;"> Deep reinforcement learning is a promising approach to training a dialog manager, but current methods struggle with the large state and action spaces of multi-domain dialog systems. Building upon Deep Q-learning from Demonstrations (DQfD), an algorithm that scores highly in difficult Atari games, we leverage dialog data to guide the agent to successfully respond to a user's requests. We make progressively fewer assumptions about the data needed, using labeled, reduced-labeled, and even unlabeled data to train expert demonstrators. We introduce Reinforced Fine-tune Learning, an extension to DQfD, enabling us to overcome the domain gap between the datasets and the environment. Experiments in a challenging multi-domain dialog system framework validate our approaches, and get high success rates even when trained on out-of-domain data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.11054v2-abstract-full').style.display = 'none'; document.getElementById('2004.11054v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages + 2 pages references + 1 page appendices, 6 figures, 2 tables, 1 algorithm, accepted as long paper at ACL2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.02995">arXiv:2004.02995</a> <span> [<a href="https://arxiv.org/pdf/2004.02995">pdf</a>, <a href="https://arxiv.org/ps/2004.02995">ps</a>, <a href="https://arxiv.org/format/2004.02995">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2020.emnlp-main.245">10.18653/v1/2020.emnlp-main.245 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Multi-Step Inference for Reasoning Over Paragraphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiangming Liu</a>, <a href="/search/cs?searchtype=author&query=Gardner%2C+M">Matt Gardner</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Lapata%2C+M">Mirella Lapata</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.02995v2-abstract-short" style="display: inline;"> Complex reasoning over text requires understanding and chaining together free-form predicates and logical connectives. Prior work has largely tried to do this either symbolically or with black-box transformers. We present a middle ground between these two extremes: a compositional model reminiscent of neural module networks that can perform chained logical reasoning. This model first finds relevan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.02995v2-abstract-full').style.display = 'inline'; document.getElementById('2004.02995v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.02995v2-abstract-full" style="display: none;"> Complex reasoning over text requires understanding and chaining together free-form predicates and logical connectives. Prior work has largely tried to do this either symbolically or with black-box transformers. We present a middle ground between these two extremes: a compositional model reminiscent of neural module networks that can perform chained logical reasoning. This model first finds relevant sentences in the context and then chains them together using neural modules. Our model gives significant performance improvements (up to 29\% relative error reduction when comfibined with a reranker) on ROPES, a recently introduced complex reasoning dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.02995v2-abstract-full').style.display = 'none'; document.getElementById('2004.02995v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by EMNLP 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.01365">arXiv:2002.01365</a> <span> [<a href="https://arxiv.org/pdf/2002.01365">pdf</a>, <a href="https://arxiv.org/format/2002.01365">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Compositional Languages Emerge in a Neural Iterated Learning Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yi Ren</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+S">Shangmin Guo</a>, <a href="/search/cs?searchtype=author&query=Labeau%2C+M">Matthieu Labeau</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Kirby%2C+S">Simon Kirby</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.01365v2-abstract-short" style="display: inline;"> The principle of compositionality, which enables natural language to represent complex concepts via a structured combination of simpler ones, allows us to convey an open-ended set of messages using a limited vocabulary. If compositionality is indeed a natural property of language, we may expect it to appear in communication protocols that are created by neural agents in language games. In this pap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.01365v2-abstract-full').style.display = 'inline'; document.getElementById('2002.01365v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.01365v2-abstract-full" style="display: none;"> The principle of compositionality, which enables natural language to represent complex concepts via a structured combination of simpler ones, allows us to convey an open-ended set of messages using a limited vocabulary. If compositionality is indeed a natural property of language, we may expect it to appear in communication protocols that are created by neural agents in language games. In this paper, we propose an effective neural iterated learning (NIL) algorithm that, when applied to interacting neural agents, facilitates the emergence of a more structured type of language. Indeed, these languages provide learning speed advantages to neural agents during training, which can be incrementally amplified via NIL. We provide a probabilistic model of NIL and an explanation of why the advantage of compositional language exist. Our experiments confirm our analysis, and also demonstrate that the emerged languages largely improve the generalizing power of the neural agent communication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.01365v2-abstract-full').style.display = 'none'; document.getElementById('2002.01365v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by ICLR-2020</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ICLR-2020 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.03285">arXiv:1909.03285</a> <span> [<a href="https://arxiv.org/pdf/1909.03285">pdf</a>, <a href="https://arxiv.org/format/1909.03285">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Semantic Role Labeling with Iterative Structure Refinement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+C">Chunchuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Titov%2C+I">Ivan Titov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.03285v1-abstract-short" style="display: inline;"> Modern state-of-the-art Semantic Role Labeling (SRL) methods rely on expressive sentence encoders (e.g., multi-layer LSTMs) but tend to model only local (if any) interactions between individual argument labeling decisions. This contrasts with earlier work and also with the intuition that the labels of individual arguments are strongly interdependent. We model interactions between argument labeling… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.03285v1-abstract-full').style.display = 'inline'; document.getElementById('1909.03285v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.03285v1-abstract-full" style="display: none;"> Modern state-of-the-art Semantic Role Labeling (SRL) methods rely on expressive sentence encoders (e.g., multi-layer LSTMs) but tend to model only local (if any) interactions between individual argument labeling decisions. This contrasts with earlier work and also with the intuition that the labels of individual arguments are strongly interdependent. We model interactions between argument labeling decisions through {\it iterative refinement}. Starting with an output produced by a factorized model, we iteratively refine it using a refinement network. Instead of modeling arbitrary interactions among roles and words, we encode prior knowledge about the SRL problem by designing a restricted network architecture capturing non-local interactions. This modeling choice prevents overfitting and results in an effective model, outperforming strong factorized baseline models on all 7 CoNLL-2009 languages, and achieving state-of-the-art results on 5 of them, including English. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.03285v1-abstract-full').style.display = 'none'; document.getElementById('1909.03285v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> EMNLP 2019 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1907.08722">arXiv:1907.08722</a> <span> [<a href="https://arxiv.org/pdf/1907.08722">pdf</a>, <a href="https://arxiv.org/ps/1907.08722">ps</a>, <a href="https://arxiv.org/format/1907.08722">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> What is this Article about? Extreme Summarization with Topic-aware Convolutional Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Narayan%2C+S">Shashi Narayan</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a>, <a href="/search/cs?searchtype=author&query=Lapata%2C+M">Mirella Lapata</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1907.08722v1-abstract-short" style="display: inline;"> We introduce 'extreme summarization', a new single-document summarization task which aims at creating a short, one-sentence news summary answering the question ``What is the article about?''. We argue that extreme summarization, by nature, is not amenable to extractive strategies and requires an abstractive modeling approach. In the hope of driving research on this task further: (a) we collect a r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.08722v1-abstract-full').style.display = 'inline'; document.getElementById('1907.08722v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1907.08722v1-abstract-full" style="display: none;"> We introduce 'extreme summarization', a new single-document summarization task which aims at creating a short, one-sentence news summary answering the question ``What is the article about?''. We argue that extreme summarization, by nature, is not amenable to extractive strategies and requires an abstractive modeling approach. In the hope of driving research on this task further: (a) we collect a real-world, large scale dataset by harvesting online articles from the British Broadcasting Corporation (BBC); and (b) propose a novel abstractive model which is conditioned on the article's topics and based entirely on convolutional neural networks. We demonstrate experimentally that this architecture captures long-range dependencies in a document and recognizes pertinent content, outperforming an oracle extractive system and state-of-the-art abstractive approaches when evaluated automatically and by humans on the extreme summarization dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1907.08722v1-abstract-full').style.display = 'none'; document.getElementById('1907.08722v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to appear in Journal of Artificial Intelligence Research (JAIR), 37 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.09585">arXiv:1904.09585</a> <span> [<a href="https://arxiv.org/pdf/1904.09585">pdf</a>, <a href="https://arxiv.org/format/1904.09585">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Obfuscation for Privacy-preserving Syntactic Parsing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhifeng Hu</a>, <a href="/search/cs?searchtype=author&query=Havrylov%2C+S">Serhii Havrylov</a>, <a href="/search/cs?searchtype=author&query=Titov%2C+I">Ivan Titov</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.09585v2-abstract-short" style="display: inline;"> The goal of homomorphic encryption is to encrypt data such that another party can operate on it without being explicitly exposed to the content of the original data. We introduce an idea for a privacy-preserving transformation on natural language data, inspired by homomorphic encryption. Our primary tool is {\em obfuscation}, relying on the properties of natural language. Specifically, a given Eng… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.09585v2-abstract-full').style.display = 'inline'; document.getElementById('1904.09585v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.09585v2-abstract-full" style="display: none;"> The goal of homomorphic encryption is to encrypt data such that another party can operate on it without being explicitly exposed to the content of the original data. We introduce an idea for a privacy-preserving transformation on natural language data, inspired by homomorphic encryption. Our primary tool is {\em obfuscation}, relying on the properties of natural language. Specifically, a given English text is obfuscated using a neural model that aims to preserve the syntactic relationships of the original sentence so that the obfuscated sentence can be parsed instead of the original one. The model works at the word level, and learns to obfuscate each word separately by changing it into a new word that has a similar syntactic role. The text obfuscated by our model leads to better performance on three syntactic parsers (two dependency and one constituency parsers) in comparison to an upper-bound random substitution baseline. More specifically, the results demonstrate that as more terms are obfuscated (by their part of speech), the substitution upper bound significantly degrades, while the neural model maintains a relatively high performing parser. All of this is done without much sacrifice of privacy compared to the random substitution upper bound. We also further analyze the results, and discover that the substituted words have similar syntactic properties, but different semantic content, compared to the original words. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.09585v2-abstract-full').style.display = 'none'; document.getElementById('1904.09585v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IWPT 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.02020">arXiv:1904.02020</a> <span> [<a href="https://arxiv.org/pdf/1904.02020">pdf</a>, <a href="https://arxiv.org/format/1904.02020">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Jointly Extracting and Compressing Documents with Summary State Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mendes%2C+A">Afonso Mendes</a>, <a href="/search/cs?searchtype=author&query=Narayan%2C+S">Shashi Narayan</a>, <a href="/search/cs?searchtype=author&query=Miranda%2C+S">Sebasti茫o Miranda</a>, <a href="/search/cs?searchtype=author&query=Marinho%2C+Z">Zita Marinho</a>, <a href="/search/cs?searchtype=author&query=Martins%2C+A+F+T">Andr茅 F. T. Martins</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.02020v2-abstract-short" style="display: inline;"> We present a new neural model for text summarization that first extracts sentences from a document and then compresses them. The proposed model offers a balance that sidesteps the difficulties in abstractive methods while generating more concise summaries than extractive methods. In addition, our model dynamically determines the length of the output summary based on the gold summaries it observes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.02020v2-abstract-full').style.display = 'inline'; document.getElementById('1904.02020v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.02020v2-abstract-full" style="display: none;"> We present a new neural model for text summarization that first extracts sentences from a document and then compresses them. The proposed model offers a balance that sidesteps the difficulties in abstractive methods while generating more concise summaries than extractive methods. In addition, our model dynamically determines the length of the output summary based on the gold summaries it observes during training and does not require length constraints typical to extractive summarization. The model achieves state-of-the-art results on the CNN/DailyMail and Newsroom datasets, improving over current extractive and abstractive methods. Human evaluations demonstrate that our model generates concise and informative summaries. We also make available a new dataset of oracle compressive summaries derived automatically from the CNN/DailyMail reference summaries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.02020v2-abstract-full').style.display = 'none'; document.getElementById('1904.02020v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> NAACL 2019 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.00615">arXiv:1904.00615</a> <span> [<a href="https://arxiv.org/pdf/1904.00615">pdf</a>, <a href="https://arxiv.org/format/1904.00615">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Discontinuous Constituency Parsing with a Stack-Free Transition System and a Dynamic Oracle </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Coavoux%2C+M">Maximin Coavoux</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.00615v1-abstract-short" style="display: inline;"> We introduce a novel transition system for discontinuous constituency parsing. Instead of storing subtrees in a stack --i.e. a data structure with linear-time sequential access-- the proposed system uses a set of parsing items, with constant-time random access. This change makes it possible to construct any discontinuous constituency tree in exactly $4n - 2$ transitions for a sentence of length… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.00615v1-abstract-full').style.display = 'inline'; document.getElementById('1904.00615v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.00615v1-abstract-full" style="display: none;"> We introduce a novel transition system for discontinuous constituency parsing. Instead of storing subtrees in a stack --i.e. a data structure with linear-time sequential access-- the proposed system uses a set of parsing items, with constant-time random access. This change makes it possible to construct any discontinuous constituency tree in exactly $4n - 2$ transitions for a sentence of length $n$. At each parsing step, the parser considers every item in the set to be combined with a focus item and to construct a new constituent in a bottom-up fashion. The parsing strategy is based on the assumption that most syntactic structures can be parsed incrementally and that the set --the memory of the parser-- remains reasonably small on average. Moreover, we introduce a provably correct dynamic oracle for the new transition system, and present the first experiments in discontinuous constituency parsing using a dynamic oracle. Our parser obtains state-of-the-art results on three English and German discontinuous treebanks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.00615v1-abstract-full').style.display = 'none'; document.getElementById('1904.00615v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at NAACL 2019; 14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.11410">arXiv:1903.11410</a> <span> [<a href="https://arxiv.org/pdf/1903.11410">pdf</a>, <a href="https://arxiv.org/ps/1903.11410">ps</a>, <a href="https://arxiv.org/format/1903.11410">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Structural Neural Encoders for AMR-to-text Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Damonte%2C+M">Marco Damonte</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+S+B">Shay B. Cohen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.11410v2-abstract-short" style="display: inline;"> AMR-to-text generation is a problem recently introduced to the NLP community, in which the goal is to generate sentences from Abstract Meaning Representation (AMR) graphs. Sequence-to-sequence models can be used to this end by converting the AMR graphs to strings. Approaching the problem while working directly with graphs requires the use of graph-to-sequence models that encode the AMR graph into… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.11410v2-abstract-full').style.display = 'inline'; document.getElementById('1903.11410v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.11410v2-abstract-full" style="display: none;"> AMR-to-text generation is a problem recently introduced to the NLP community, in which the goal is to generate sentences from Abstract Meaning Representation (AMR) graphs. Sequence-to-sequence models can be used to this end by converting the AMR graphs to strings. Approaching the problem while working directly with graphs requires the use of graph-to-sequence models that encode the AMR graph into a vector representation. Such encoding has been shown to be beneficial in the past, and unlike sequential encoding, it allows us to explicitly capture reentrant structures in the AMR graphs. We investigate the extent to which reentrancies (nodes with multiple parents) have an impact on AMR-to-text generation by comparing graph encoders to tree encoders, where reentrancies are not preserved. We show that improvements in the treatment of reentrancies and long-range dependencies contribute to higher overall scores for graph encoders. Our best model achieves 24.40 BLEU on LDC2015E86, outperforming the state of the art by 1.1 points and 24.54 BLEU on LDC2017T10, outperforming the state of the art by 1.24 points. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.11410v2-abstract-full').style.display = 'none'; document.getElementById('1903.11410v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proceedings of NAACL 2019</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Cohen%2C+S+B&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Cohen%2C+S+B&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Cohen%2C+S+B&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository