CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;37 of 37 results for author: <span class="mathjax">Clark, E</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Clark%2C+E">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Clark, E"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Clark%2C+E&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Clark, E"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20763">arXiv:2410.20763</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.20763">pdf</a>, <a href="https://arxiv.org/format/2410.20763">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Evaluating LLMs for Targeted Concept Simplification for Domain-Specific Texts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Asthana%2C+S">Sumit Asthana</a>, <a href="/search/cs?searchtype=author&amp;query=Rashkin%2C+H">Hannah Rashkin</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Huot%2C+F">Fantine Huot</a>, <a href="/search/cs?searchtype=author&amp;query=Lapata%2C+M">Mirella Lapata</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20763v2-abstract-short" style="display: inline;"> One useful application of NLP models is to support people in reading complex text from unfamiliar domains (e.g., scientific articles). Simplifying the entire text makes it understandable but sometimes removes important details. On the contrary, helping adult readers understand difficult concepts in context can enhance their vocabulary and knowledge. In a preliminary human study, we first identify&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20763v2-abstract-full').style.display = 'inline'; document.getElementById('2410.20763v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20763v2-abstract-full" style="display: none;"> One useful application of NLP models is to support people in reading complex text from unfamiliar domains (e.g., scientific articles). Simplifying the entire text makes it understandable but sometimes removes important details. On the contrary, helping adult readers understand difficult concepts in context can enhance their vocabulary and knowledge. In a preliminary human study, we first identify that lack of context and unfamiliarity with difficult concepts is a major reason for adult readers&#39; difficulty with domain-specific text. We then introduce &#34;targeted concept simplification,&#34; a simplification task for rewriting text to help readers comprehend text containing unfamiliar concepts. We also introduce WikiDomains, a new dataset of 22k definitions from 13 academic domains paired with a difficult concept within each definition. We benchmark the performance of open-source and commercial LLMs and a simple dictionary baseline on this task across human judgments of ease of understanding and meaning preservation. Interestingly, our human judges preferred explanations about the difficult concept more than simplification of the concept phrase. Further, no single model achieved superior performance across all quality dimensions, and automated metrics also show low correlations with human evaluations of concept simplification ($\sim0.2$), opening up rich avenues for research on personalized human reading comprehension support. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20763v2-abstract-full').style.display = 'none'; document.getElementById('2410.20763v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">to appear in proceedings of EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02603">arXiv:2410.02603</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.02603">pdf</a>, <a href="https://arxiv.org/format/2410.02603">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Agents&#39; Room: Narrative Generation through Multi-step Collaboration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huot%2C+F">Fantine Huot</a>, <a href="/search/cs?searchtype=author&amp;query=Amplayo%2C+R+K">Reinald Kim Amplayo</a>, <a href="/search/cs?searchtype=author&amp;query=Palomaki%2C+J">Jennimaria Palomaki</a>, <a href="/search/cs?searchtype=author&amp;query=Jakobovits%2C+A+S">Alice Shoshana Jakobovits</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Lapata%2C+M">Mirella Lapata</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02603v1-abstract-short" style="display: inline;"> Writing compelling fiction is a multifaceted process combining elements such as crafting a plot, developing interesting characters, and using evocative language. While large language models (LLMs) show promise for story writing, they currently rely heavily on intricate prompting, which limits their use. We propose Agents&#39; Room, a generation framework inspired by narrative theory, that decomposes n&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02603v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02603v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02603v1-abstract-full" style="display: none;"> Writing compelling fiction is a multifaceted process combining elements such as crafting a plot, developing interesting characters, and using evocative language. While large language models (LLMs) show promise for story writing, they currently rely heavily on intricate prompting, which limits their use. We propose Agents&#39; Room, a generation framework inspired by narrative theory, that decomposes narrative writing into subtasks tackled by specialized agents. To illustrate our method, we introduce Tell Me A Story, a high-quality dataset of complex writing prompts and human-written stories, and a novel evaluation framework designed specifically for assessing long narratives. We show that Agents&#39; Room generates stories that are preferred by expert evaluators over those produced by baseline systems by leveraging collaboration and specialization to decompose the complex story writing task into tractable components. We provide extensive analysis with automated and human-based metrics of the generated output. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02603v1-abstract-full').style.display = 'none'; document.getElementById('2410.02603v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under review as a conference paper at ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.06724">arXiv:2406.06724</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.06724">pdf</a>, <a href="https://arxiv.org/format/2406.06724">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/IROS51168.2021.9635987">10.1109/IROS51168.2021.9635987 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Stochastic Guidance of Buoyancy Controlled Vehicles under Ice Shelves using Ocean Currents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Rossi%2C+F">Federico Rossi</a>, <a href="/search/cs?searchtype=author&amp;query=Branch%2C+A">Andrew Branch</a>, <a href="/search/cs?searchtype=author&amp;query=Schodlok%2C+M+P">Michael P. Schodlok</a>, <a href="/search/cs?searchtype=author&amp;query=Stanton%2C+T">Timothy Stanton</a>, <a href="/search/cs?searchtype=author&amp;query=Fenty%2C+I+G">Ian G. Fenty</a>, <a href="/search/cs?searchtype=author&amp;query=Hook%2C+J+V">Joshua Vander Hook</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E+B">Evan B. Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.06724v1-abstract-short" style="display: inline;"> We propose a novel technique for guidance of buoyancy-controlled vehicles in uncertain under-ice ocean flows. In-situ melt rate measurements collected at the grounding zone of Antarctic ice shelves, where the ice shelf meets the underlying bedrock, are essential to constrain models of future sea level rise. Buoyancy-controlled vehicles, which control their vertical position in the water column thr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06724v1-abstract-full').style.display = 'inline'; document.getElementById('2406.06724v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.06724v1-abstract-full" style="display: none;"> We propose a novel technique for guidance of buoyancy-controlled vehicles in uncertain under-ice ocean flows. In-situ melt rate measurements collected at the grounding zone of Antarctic ice shelves, where the ice shelf meets the underlying bedrock, are essential to constrain models of future sea level rise. Buoyancy-controlled vehicles, which control their vertical position in the water column through internal actuation but have no means of horizontal propulsion, offer an affordable and reliable platform for such in-situ data collection. However, reaching the grounding zone requires vehicles to traverse tens of kilometers under the ice shelf, with approximate position knowledge and no means of communication, in highly variable and uncertain ocean currents. To address this challenge, we propose a partially observable MDP approach that exploits model-based knowledge of the under-ice currents and, critically, of their uncertainty, to synthesize effective guidance policies. The approach uses approximate dynamic programming to model uncertainty in the currents, and QMDP to address localization uncertainty. Numerical experiments show that the policy can deliver up to 88.8% of underwater vehicles to the grounding zone -- a 33% improvement compared to state-of-the-art guidance techniques, and a 262% improvement over uncontrolled drifters. Collectively, these results show that model-based under-ice guidance is a highly promising technique for exploration of under-ice cavities, and has the potential to enable cost-effective and scalable access to these challenging and rarely observed environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06724v1-abstract-full').style.display = 'none'; document.getElementById('2406.06724v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at IROS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.14199">arXiv:2405.14199</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.14199">pdf</a>, <a href="https://arxiv.org/format/2405.14199">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Teaching in Heterogeneous Agents: Balancing Surprise in Sparse Reward Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Emma Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Ryu%2C+K">Kanghyun Ryu</a>, <a href="/search/cs?searchtype=author&amp;query=Mehr%2C+N">Negar Mehr</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.14199v1-abstract-short" style="display: inline;"> Learning from Demonstration (LfD) can be an efficient way to train systems with analogous agents by enabling ``Student&#39;&#39; agents to learn from the demonstrations of the most experienced ``Teacher&#39;&#39; agent, instead of training their policy in parallel. However, when there are discrepancies in agent capabilities, such as divergent actuator power or joint angle constraints, naively replicating demonstr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14199v1-abstract-full').style.display = 'inline'; document.getElementById('2405.14199v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.14199v1-abstract-full" style="display: none;"> Learning from Demonstration (LfD) can be an efficient way to train systems with analogous agents by enabling ``Student&#39;&#39; agents to learn from the demonstrations of the most experienced ``Teacher&#39;&#39; agent, instead of training their policy in parallel. However, when there are discrepancies in agent capabilities, such as divergent actuator power or joint angle constraints, naively replicating demonstrations that are out of bounds for the Student&#39;s capability can limit efficient learning. We present a Teacher-Student learning framework specifically tailored to address the challenge of heterogeneity between the Teacher and Student agents. Our framework is based on the concept of ``surprise&#39;&#39;, inspired by its application in exploration incentivization in sparse-reward environments. Surprise is repurposed to enable the Teacher to detect and adapt to differences between itself and the Student. By focusing on maximizing its surprise in response to the environment while concurrently minimizing the Student&#39;s surprise in response to the demonstrations, the Teacher agent can effectively tailor its demonstrations to the Student&#39;s specific capabilities and constraints. We validate our method by demonstrating improvements in the Student&#39;s learning in control tasks within sparse-reward environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14199v1-abstract-full').style.display = 'none'; document.getElementById('2405.14199v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be published in L4DC 2024, 10 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03906">arXiv:2403.03906</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.03906">pdf</a>, <a href="https://arxiv.org/format/2403.03906">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> On HTLC-Based Protocols for Multi-Party Cross-Chain Swaps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Emily Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Georgiou%2C+C">Chloe Georgiou</a>, <a href="/search/cs?searchtype=author&amp;query=Poon%2C+K">Katelyn Poon</a>, <a href="/search/cs?searchtype=author&amp;query=Chrobak%2C+M">Marek Chrobak</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03906v2-abstract-short" style="display: inline;"> In his 2018 paper, Herlihy introduced an atomic protocol for multi-party asset swaps across different blockchains. His model represents an asset swap by a directed graph whose nodes are the participating parties and edges represent asset transfers, and rational behavior of the participants is captured by a preference relation between a protocol&#39;s outcomes. Asset transfers between parties are achie&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03906v2-abstract-full').style.display = 'inline'; document.getElementById('2403.03906v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03906v2-abstract-full" style="display: none;"> In his 2018 paper, Herlihy introduced an atomic protocol for multi-party asset swaps across different blockchains. His model represents an asset swap by a directed graph whose nodes are the participating parties and edges represent asset transfers, and rational behavior of the participants is captured by a preference relation between a protocol&#39;s outcomes. Asset transfers between parties are achieved using smart contracts. These smart contracts are quite involved and they require storage and processing of a large number of paths in the swap digraph, limiting practical significance of his protocol. His paper also describes a different protocol that uses only standard hash time-lock contracts (HTLC&#39;s), but this simpler protocol applies only to some special types of digraphs. He left open the question whether there is a simple and efficient protocol for cross-chain asset swaps in arbitrary digraphs. Motivated by this open problem, we conducted a comprehensive study of \emph{HTLC-based protocols}, in which all asset transfers are implemented with HTLCs. Our main contribution is a full characterization of swap digraphs that have such protocols. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03906v2-abstract-full').style.display = 'none'; document.getElementById('2403.03906v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.03506">arXiv:2401.03506</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.03506">pdf</a>, <a href="https://arxiv.org/format/2401.03506">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> DiarizationLM: Speaker Diarization Post-Processing with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Q">Quan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yiling Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+G">Guanlong Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Evan Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+W">Wei Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Liao%2C+H">Hank Liao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.03506v9-abstract-short" style="display: inline;"> In this paper, we introduce DiarizationLM, a framework to leverage large language models (LLM) to post-process the outputs from a speaker diarization system. Various goals can be achieved with the proposed framework, such as improving the readability of the diarized transcript, or reducing the word diarization error rate (WDER). In this framework, the outputs of the automatic speech recognition (A&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03506v9-abstract-full').style.display = 'inline'; document.getElementById('2401.03506v9-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.03506v9-abstract-full" style="display: none;"> In this paper, we introduce DiarizationLM, a framework to leverage large language models (LLM) to post-process the outputs from a speaker diarization system. Various goals can be achieved with the proposed framework, such as improving the readability of the diarized transcript, or reducing the word diarization error rate (WDER). In this framework, the outputs of the automatic speech recognition (ASR) and speaker diarization systems are represented as a compact textual format, which is included in the prompt to an optionally finetuned LLM. The outputs of the LLM can be used as the refined diarization results with the desired enhancement. As a post-processing step, this framework can be easily applied to any off-the-shelf ASR and speaker diarization systems without retraining existing components. Our experiments show that a finetuned PaLM 2-S model can reduce the WDER by rel. 55.5% on the Fisher telephone conversation dataset, and rel. 44.9% on the Callhome English dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03506v9-abstract-full').style.display = 'none'; document.getElementById('2401.03506v9-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.09424">arXiv:2311.09424</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.09424">pdf</a>, <a href="https://arxiv.org/format/2311.09424">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Predicting Spine Geometry and Scoliosis from DXA Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jamaludin%2C+A">Amir Jamaludin</a>, <a href="/search/cs?searchtype=author&amp;query=Kadir%2C+T">Timor Kadir</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Emma Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Zisserman%2C+A">Andrew Zisserman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.09424v1-abstract-short" style="display: inline;"> Our objective in this paper is to estimate spine curvature in DXA scans. To this end we first train a neural network to predict the middle spine curve in the scan, and then use an integral-based method to determine the curvature along the spine curve. We use the curvature to compare to the standard angle scoliosis measure obtained using the DXA Scoliosis Method (DSM). The performance improves over&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09424v1-abstract-full').style.display = 'inline'; document.getElementById('2311.09424v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.09424v1-abstract-full" style="display: none;"> Our objective in this paper is to estimate spine curvature in DXA scans. To this end we first train a neural network to predict the middle spine curve in the scan, and then use an integral-based method to determine the curvature along the spine curve. We use the curvature to compare to the standard angle scoliosis measure obtained using the DXA Scoliosis Method (DSM). The performance improves over the prior work of Jamaludin et al. 2018. We show that the maximum curvature can be used as a scoring function for ordering the severity of spinal deformation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09424v1-abstract-full').style.display = 'none'; document.getElementById('2311.09424v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CSI@MICCAI 2019 Submission</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.16600">arXiv:2305.16600</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.16600">pdf</a>, <a href="https://arxiv.org/format/2305.16600">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Temporal Evolution of Risk Behavior in a Disease Spread Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Langle-Chimal%2C+O+D">Ollin D. Langle-Chimal</a>, <a href="/search/cs?searchtype=author&amp;query=Merrill%2C+S+C">Scott C. Merrill</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E+M">Eric M. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Bucini%2C+G">Gabriela Bucini</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tung-Lin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Shrum%2C+T+R">Trisha R. Shrum</a>, <a href="/search/cs?searchtype=author&amp;query=Koliba%2C+C">Christopher Koliba</a>, <a href="/search/cs?searchtype=author&amp;query=Zia%2C+A">Asim Zia</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+J+M">Julia M. Smith</a>, <a href="/search/cs?searchtype=author&amp;query=Cheney%2C+N">Nicholas Cheney</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.16600v2-abstract-short" style="display: inline;"> Human behavior is a dynamic process that evolves with experience. Understanding the evolution of individual&#39;s risk propensity is critical to design public health interventions to propitiate the adoption of better biosecurity protocols and thus, prevent the transmission of an infectious disease. Using an experimental game that simulates the spread of a disease in a network of porcine farms, we meas&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16600v2-abstract-full').style.display = 'inline'; document.getElementById('2305.16600v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.16600v2-abstract-full" style="display: none;"> Human behavior is a dynamic process that evolves with experience. Understanding the evolution of individual&#39;s risk propensity is critical to design public health interventions to propitiate the adoption of better biosecurity protocols and thus, prevent the transmission of an infectious disease. Using an experimental game that simulates the spread of a disease in a network of porcine farms, we measure how learning from experience affects the risk aversion of over $1000$ players. We used a fully automated approach to segment the players into 4 categories based on the temporal trends of their game plays and compare the outcomes of their overall game performance. We found that the risk tolerant group is $50\%$ more likely to incur an infection than the risk averse one. We also find that while all individuals decrease the amount of time it takes to make decisions as they become more experienced at the game, we find a group of players with constant decision strategies who rapidly decrease their time to make a decision and a second context-aware decision group that contemplates longer before decisions while presumably performing a real-time risk assessment. The behavioral strategies employed by players in this simulated setting could be used in the future as an early warning signal to identify undesirable biosecurity-related risk aversion preferences, or changes in behavior, which may allow for targeted interventions to help mitigate them. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.16600v2-abstract-full').style.display = 'none'; document.getElementById('2305.16600v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 1 table, 7 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> ACM-class: F.2.2; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14755">arXiv:2305.14755</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.14755">pdf</a>, <a href="https://arxiv.org/format/2305.14755">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Don&#39;t Take This Out of Context! On the Need for Contextual Models and Evaluations for Stylistic Rewriting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yerukola%2C+A">Akhila Yerukola</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xuhui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Sap%2C+M">Maarten Sap</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14755v2-abstract-short" style="display: inline;"> Most existing stylistic text rewriting methods and evaluation metrics operate on a sentence level, but ignoring the broader context of the text can lead to preferring generic, ambiguous, and incoherent rewrites. In this paper, we investigate integrating the preceding textual context into both the $\textit{rewriting}$ and $\textit{evaluation}$ stages of stylistic text rewriting, and introduce a new&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14755v2-abstract-full').style.display = 'inline'; document.getElementById('2305.14755v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14755v2-abstract-full" style="display: none;"> Most existing stylistic text rewriting methods and evaluation metrics operate on a sentence level, but ignoring the broader context of the text can lead to preferring generic, ambiguous, and incoherent rewrites. In this paper, we investigate integrating the preceding textual context into both the $\textit{rewriting}$ and $\textit{evaluation}$ stages of stylistic text rewriting, and introduce a new composite contextual evaluation metric $\texttt{CtxSimFit}$ that combines similarity to the original sentence with contextual cohesiveness. We comparatively evaluate non-contextual and contextual rewrites in formality, toxicity, and sentiment transfer tasks. Our experiments show that humans significantly prefer contextual rewrites as more fitting and natural over non-contextual ones, yet existing sentence-level automatic metrics (e.g., ROUGE, SBERT) correlate poorly with human preferences ($蟻$=0--0.3). In contrast, human preferences are much better reflected by both our novel $\texttt{CtxSimFit}$ ($蟻$=0.7--0.9) as well as proposed context-infused versions of common metrics ($蟻$=0.4--0.7). Overall, our findings highlight the importance of integrating context into the generation and especially the evaluation stages of stylistic text rewriting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14755v2-abstract-full').style.display = 'none'; document.getElementById('2305.14755v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">emnlp 2023 main camera ready</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.13194">arXiv:2305.13194</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.13194">pdf</a>, <a href="https://arxiv.org/format/2305.13194">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SEAHORSE: A Multilingual, Multifaceted Dataset for Summarization Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Rijhwani%2C+S">Shruti Rijhwani</a>, <a href="/search/cs?searchtype=author&amp;query=Gehrmann%2C+S">Sebastian Gehrmann</a>, <a href="/search/cs?searchtype=author&amp;query=Maynez%2C+J">Joshua Maynez</a>, <a href="/search/cs?searchtype=author&amp;query=Aharoni%2C+R">Roee Aharoni</a>, <a href="/search/cs?searchtype=author&amp;query=Nikolaev%2C+V">Vitaly Nikolaev</a>, <a href="/search/cs?searchtype=author&amp;query=Sellam%2C+T">Thibault Sellam</a>, <a href="/search/cs?searchtype=author&amp;query=Siddhant%2C+A">Aditya Siddhant</a>, <a href="/search/cs?searchtype=author&amp;query=Das%2C+D">Dipanjan Das</a>, <a href="/search/cs?searchtype=author&amp;query=Parikh%2C+A+P">Ankur P. Parikh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.13194v2-abstract-short" style="display: inline;"> Reliable automatic evaluation of summarization systems is challenging due to the multifaceted and subjective nature of the task. This is especially the case for languages other than English, where human evaluations are scarce. In this work, we introduce SEAHORSE, a dataset for multilingual, multifaceted summarization evaluation. SEAHORSE consists of 96K summaries with human ratings along 6 dimensi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13194v2-abstract-full').style.display = 'inline'; document.getElementById('2305.13194v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.13194v2-abstract-full" style="display: none;"> Reliable automatic evaluation of summarization systems is challenging due to the multifaceted and subjective nature of the task. This is especially the case for languages other than English, where human evaluations are scarce. In this work, we introduce SEAHORSE, a dataset for multilingual, multifaceted summarization evaluation. SEAHORSE consists of 96K summaries with human ratings along 6 dimensions of text quality: comprehensibility, repetition, grammar, attribution, main ideas, and conciseness, covering 6 languages, 9 systems and 4 datasets. As a result of its size and scope, SEAHORSE can serve both as a benchmark to evaluate learnt metrics, as well as a large-scale resource for training such metrics. We show that metrics trained with SEAHORSE achieve strong performance on the out-of-domain meta-evaluation benchmarks TRUE (Honovich et al., 2022) and mFACE (Aharoni et al., 2022). We make the SEAHORSE dataset and metrics publicly available for future research on multilingual and multifaceted summarization evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13194v2-abstract-full').style.display = 'none'; document.getElementById('2305.13194v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.01633">arXiv:2305.01633</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.01633">pdf</a>, <a href="https://arxiv.org/format/2305.01633">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Missing Information, Unresponsive Authors, Experimental Flaws: The Impossibility of Assessing the Reproducibility of Previous Human Evaluations in NLP </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Belz%2C+A">Anya Belz</a>, <a href="/search/cs?searchtype=author&amp;query=Thomson%2C+C">Craig Thomson</a>, <a href="/search/cs?searchtype=author&amp;query=Reiter%2C+E">Ehud Reiter</a>, <a href="/search/cs?searchtype=author&amp;query=Abercrombie%2C+G">Gavin Abercrombie</a>, <a href="/search/cs?searchtype=author&amp;query=Alonso-Moral%2C+J+M">Jose M. Alonso-Moral</a>, <a href="/search/cs?searchtype=author&amp;query=Arvan%2C+M">Mohammad Arvan</a>, <a href="/search/cs?searchtype=author&amp;query=Braggaar%2C+A">Anouck Braggaar</a>, <a href="/search/cs?searchtype=author&amp;query=Cieliebak%2C+M">Mark Cieliebak</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=van+Deemter%2C+K">Kees van Deemter</a>, <a href="/search/cs?searchtype=author&amp;query=Dinkar%2C+T">Tanvi Dinkar</a>, <a href="/search/cs?searchtype=author&amp;query=Du%C5%A1ek%2C+O">Ond艡ej Du拧ek</a>, <a href="/search/cs?searchtype=author&amp;query=Eger%2C+S">Steffen Eger</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+Q">Qixiang Fang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+M">Mingqi Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Gatt%2C+A">Albert Gatt</a>, <a href="/search/cs?searchtype=author&amp;query=Gkatzia%2C+D">Dimitra Gkatzia</a>, <a href="/search/cs?searchtype=author&amp;query=Gonz%C3%A1lez-Corbelle%2C+J">Javier Gonz谩lez-Corbelle</a>, <a href="/search/cs?searchtype=author&amp;query=Hovy%2C+D">Dirk Hovy</a>, <a href="/search/cs?searchtype=author&amp;query=H%C3%BCrlimann%2C+M">Manuela H眉rlimann</a>, <a href="/search/cs?searchtype=author&amp;query=Ito%2C+T">Takumi Ito</a>, <a href="/search/cs?searchtype=author&amp;query=Kelleher%2C+J+D">John D. Kelleher</a>, <a href="/search/cs?searchtype=author&amp;query=Klubicka%2C+F">Filip Klubicka</a>, <a href="/search/cs?searchtype=author&amp;query=Krahmer%2C+E">Emiel Krahmer</a>, <a href="/search/cs?searchtype=author&amp;query=Lai%2C+H">Huiyuan Lai</a> , et al. (17 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.01633v2-abstract-short" style="display: inline;"> We report our efforts in identifying a set of previous human evaluations in NLP that would be suitable for a coordinated study examining what makes human evaluations in NLP more/less reproducible. We present our results and findings, which include that just 13\% of papers had (i) sufficiently low barriers to reproduction, and (ii) enough obtainable information, to be considered for reproduction, a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.01633v2-abstract-full').style.display = 'inline'; document.getElementById('2305.01633v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.01633v2-abstract-full" style="display: none;"> We report our efforts in identifying a set of previous human evaluations in NLP that would be suitable for a coordinated study examining what makes human evaluations in NLP more/less reproducible. We present our results and findings, which include that just 13\% of papers had (i) sufficiently low barriers to reproduction, and (ii) enough obtainable information, to be considered for reproduction, and that all but one of the experiments we selected for reproduction was discovered to have flaws that made the meaningfulness of conducting a reproduction questionable. As a result, we had to change our coordinated study design from a reproduce approach to a standardise-then-reproduce-twice approach. Our overall (negative) finding that the great majority of human evaluations in NLP is not repeatable and/or not reproducible and/or too flawed to justify reproduction, paints a dire picture, but presents an opportunity for a rethink about how to design and report human evaluations in NLP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.01633v2-abstract-full').style.display = 'none'; document.getElementById('2305.01633v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages plus appendix, 4 tables, 1 figure. To appear at &#34;Workshop on Insights from Negative Results in NLP&#34; (co-located with EACL2023). Updated author list and acknowledgements</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.10622">arXiv:2212.10622</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.10622">pdf</a>, <a href="https://arxiv.org/format/2212.10622">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> mFACE: Multilingual Summarization with Factual Consistency Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Aharoni%2C+R">Roee Aharoni</a>, <a href="/search/cs?searchtype=author&amp;query=Narayan%2C+S">Shashi Narayan</a>, <a href="/search/cs?searchtype=author&amp;query=Maynez%2C+J">Joshua Maynez</a>, <a href="/search/cs?searchtype=author&amp;query=Herzig%2C+J">Jonathan Herzig</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Lapata%2C+M">Mirella Lapata</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.10622v2-abstract-short" style="display: inline;"> Abstractive summarization has enjoyed renewed interest in recent years, thanks to pre-trained language models and the availability of large-scale datasets. Despite promising results, current models still suffer from generating factually inconsistent summaries, reducing their utility for real-world application. Several recent efforts attempt to address this by devising models that automatically det&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10622v2-abstract-full').style.display = 'inline'; document.getElementById('2212.10622v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.10622v2-abstract-full" style="display: none;"> Abstractive summarization has enjoyed renewed interest in recent years, thanks to pre-trained language models and the availability of large-scale datasets. Despite promising results, current models still suffer from generating factually inconsistent summaries, reducing their utility for real-world application. Several recent efforts attempt to address this by devising models that automatically detect factual inconsistencies in machine generated summaries. However, they focus exclusively on English, a language with abundant resources. In this work, we leverage factual consistency evaluation models to improve multilingual summarization. We explore two intuitive approaches to mitigate hallucinations based on the signal provided by a multilingual NLI model, namely data filtering and controlled generation. Experimental results in the 45 languages from the XLSum dataset show gains over strong baselines in both automatic and human evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10622v2-abstract-full').style.display = 'none'; document.getElementById('2212.10622v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages with links to released data</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.10397">arXiv:2212.10397</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.10397">pdf</a>, <a href="https://arxiv.org/format/2212.10397">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Needle in a Haystack: An Analysis of High-Agreement Workers on MTurk for Summarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Lining Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Mille%2C+S">Simon Mille</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+Y">Yufang Hou</a>, <a href="/search/cs?searchtype=author&amp;query=Deutsch%2C+D">Daniel Deutsch</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yixin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Mahamood%2C+S">Saad Mahamood</a>, <a href="/search/cs?searchtype=author&amp;query=Gehrmann%2C+S">Sebastian Gehrmann</a>, <a href="/search/cs?searchtype=author&amp;query=Clinciu%2C+M">Miruna Clinciu</a>, <a href="/search/cs?searchtype=author&amp;query=Chandu%2C+K">Khyathi Chandu</a>, <a href="/search/cs?searchtype=author&amp;query=Sedoc%2C+J">Jo茫o Sedoc</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.10397v3-abstract-short" style="display: inline;"> To prevent the costly and inefficient use of resources on low-quality annotations, we want a method for creating a pool of dependable annotators who can effectively complete difficult tasks, such as evaluating automatic summarization. Thus, we investigate the recruitment of high-quality Amazon Mechanical Turk workers via a two-step pipeline. We show that we can successfully filter out subpar worke&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10397v3-abstract-full').style.display = 'inline'; document.getElementById('2212.10397v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.10397v3-abstract-full" style="display: none;"> To prevent the costly and inefficient use of resources on low-quality annotations, we want a method for creating a pool of dependable annotators who can effectively complete difficult tasks, such as evaluating automatic summarization. Thus, we investigate the recruitment of high-quality Amazon Mechanical Turk workers via a two-step pipeline. We show that we can successfully filter out subpar workers before they carry out the evaluations and obtain high-agreement annotations with similar constraints on resources. Although our workers demonstrate a strong consensus among themselves and CloudResearch workers, their alignment with expert judgments on a subset of the data is not as expected and needs further training in correctness. This paper still serves as a best practice for the recruitment of qualified annotators in other challenging annotation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10397v3-abstract-full').style.display = 'none'; document.getElementById('2212.10397v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.06746">arXiv:2212.06746</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.06746">pdf</a>, <a href="https://arxiv.org/format/2212.06746">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1088/1748-3190/acd671">10.1088/1748-3190/acd671 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Pacific Lamprey Inspired Climbing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Van+Stratum%2C+B">Brian Van Stratum</a>, <a href="/search/cs?searchtype=author&amp;query=Shoele%2C+K">Kourosh Shoele</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+J+E">Jonathan E. Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.06746v1-abstract-short" style="display: inline;"> Snakes and their bio-inspired robot counterparts have demonstrated locomotion on a wide range of terrains. However, dynamic vertical climbing is one locomotion strategy that has received little attention in the existing snake robotics literature. We demonstrate a new scansorial gait and robot inspired by the locomotion of the Pacific Lamprey. This new gait allows a robot to steer while climbing on&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.06746v1-abstract-full').style.display = 'inline'; document.getElementById('2212.06746v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.06746v1-abstract-full" style="display: none;"> Snakes and their bio-inspired robot counterparts have demonstrated locomotion on a wide range of terrains. However, dynamic vertical climbing is one locomotion strategy that has received little attention in the existing snake robotics literature. We demonstrate a new scansorial gait and robot inspired by the locomotion of the Pacific Lamprey. This new gait allows a robot to steer while climbing on flat, near-vertical surfaces. A reduced-order model is developed and used to explore the relationship between body actuation and vertical and lateral motions of the robot. Trident, the new wall climbing lamprey-inspired robot, demonstrates dynamic climbing on flat vertical surfaces with a peak net vertical stride displacement of 4.1 cm per step. Actuating at 1.3 Hz, Trident attains a vertical climbing speed of 4.8 cm/s (0.09 Bl/s) at specific resistance of 8.3. Trident can also traverse laterally at 9 cm/s (0.17 Bl/s). Moreover, Trident is able to make 14\% longer strides than the Pacific Lamprey when climbing vertically. The computational and experimental results demonstrate that a lamprey-inspired climbing gait coupled with appropriate attachment is a useful climbing strategy for snake robots climbing near vertical surfaces with limited push points. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.06746v1-abstract-full').style.display = 'none'; document.getElementById('2212.06746v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.00922">arXiv:2211.00922</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.00922">pdf</a>, <a href="https://arxiv.org/format/2211.00922">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Dialect-robust Evaluation of Generated Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+J">Jiao Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Sellam%2C+T">Thibault Sellam</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Vu%2C+T">Tu Vu</a>, <a href="/search/cs?searchtype=author&amp;query=Dozat%2C+T">Timothy Dozat</a>, <a href="/search/cs?searchtype=author&amp;query=Garrette%2C+D">Dan Garrette</a>, <a href="/search/cs?searchtype=author&amp;query=Siddhant%2C+A">Aditya Siddhant</a>, <a href="/search/cs?searchtype=author&amp;query=Eisenstein%2C+J">Jacob Eisenstein</a>, <a href="/search/cs?searchtype=author&amp;query=Gehrmann%2C+S">Sebastian Gehrmann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.00922v1-abstract-short" style="display: inline;"> Evaluation metrics that are not robust to dialect variation make it impossible to tell how well systems perform for many groups of users, and can even penalize systems for producing text in lower-resource dialects. However, currently, there exists no way to quantify how metrics respond to change in the dialect of a generated utterance. We thus formalize dialect robustness and dialect awareness as&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00922v1-abstract-full').style.display = 'inline'; document.getElementById('2211.00922v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.00922v1-abstract-full" style="display: none;"> Evaluation metrics that are not robust to dialect variation make it impossible to tell how well systems perform for many groups of users, and can even penalize systems for producing text in lower-resource dialects. However, currently, there exists no way to quantify how metrics respond to change in the dialect of a generated utterance. We thus formalize dialect robustness and dialect awareness as goals for NLG evaluation metrics. We introduce a suite of methods and corresponding statistical tests one can use to assess metrics in light of the two goals. Applying the suite to current state-of-the-art metrics, we demonstrate that they are not dialect-robust and that semantic perturbations frequently lead to smaller decreases in a metric than the introduction of dialect features. As a first step to overcome this limitation, we propose a training schema, NANO, which introduces regional and language information to the pretraining process of a metric. We demonstrate that NANO provides a size-efficient way for models to improve the dialect robustness while simultaneously improving their performance on the standard metric benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.00922v1-abstract-full').style.display = 'none'; document.getElementById('2211.00922v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.11249">arXiv:2206.11249</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2206.11249">pdf</a>, <a href="https://arxiv.org/format/2206.11249">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GEMv2: Multilingual NLG Benchmarking in a Single Line of Code </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gehrmann%2C+S">Sebastian Gehrmann</a>, <a href="/search/cs?searchtype=author&amp;query=Bhattacharjee%2C+A">Abhik Bhattacharjee</a>, <a href="/search/cs?searchtype=author&amp;query=Mahendiran%2C+A">Abinaya Mahendiran</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+A">Alex Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Papangelis%2C+A">Alexandros Papangelis</a>, <a href="/search/cs?searchtype=author&amp;query=Madaan%2C+A">Aman Madaan</a>, <a href="/search/cs?searchtype=author&amp;query=McMillan-Major%2C+A">Angelina McMillan-Major</a>, <a href="/search/cs?searchtype=author&amp;query=Shvets%2C+A">Anna Shvets</a>, <a href="/search/cs?searchtype=author&amp;query=Upadhyay%2C+A">Ashish Upadhyay</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+B">Bingsheng Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Wilie%2C+B">Bryan Wilie</a>, <a href="/search/cs?searchtype=author&amp;query=Bhagavatula%2C+C">Chandra Bhagavatula</a>, <a href="/search/cs?searchtype=author&amp;query=You%2C+C">Chaobin You</a>, <a href="/search/cs?searchtype=author&amp;query=Thomson%2C+C">Craig Thomson</a>, <a href="/search/cs?searchtype=author&amp;query=Garbacea%2C+C">Cristina Garbacea</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+D">Dakuo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Deutsch%2C+D">Daniel Deutsch</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+D">Deyi Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+D">Di Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Gkatzia%2C+D">Dimitra Gkatzia</a>, <a href="/search/cs?searchtype=author&amp;query=Radev%2C+D">Dragomir Radev</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Durmus%2C+E">Esin Durmus</a>, <a href="/search/cs?searchtype=author&amp;query=Ladhak%2C+F">Faisal Ladhak</a>, <a href="/search/cs?searchtype=author&amp;query=Ginter%2C+F">Filip Ginter</a> , et al. (52 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.11249v3-abstract-short" style="display: inline;"> Evaluation in machine learning is usually informed by past choices, for example which datasets or metrics to use. This standardization enables the comparison on equal footing using leaderboards, but the evaluation choices become sub-optimal as better alternatives arise. This problem is especially pertinent in natural language generation which requires ever-improving suites of datasets, metrics, an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.11249v3-abstract-full').style.display = 'inline'; document.getElementById('2206.11249v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.11249v3-abstract-full" style="display: none;"> Evaluation in machine learning is usually informed by past choices, for example which datasets or metrics to use. This standardization enables the comparison on equal footing using leaderboards, but the evaluation choices become sub-optimal as better alternatives arise. This problem is especially pertinent in natural language generation which requires ever-improving suites of datasets, metrics, and human evaluation to make definitive claims. To make following best model evaluation practices easier, we introduce GEMv2. The new version of the Generation, Evaluation, and Metrics Benchmark introduces a modular infrastructure for dataset, model, and metric developers to benefit from each others work. GEMv2 supports 40 documented datasets in 51 languages. Models for all datasets can be evaluated online and our interactive data card creation and rendering tools make it easier to add new datasets to the living benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.11249v3-abstract-full').style.display = 'none'; document.getElementById('2206.11249v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.06935">arXiv:2202.06935</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2202.06935">pdf</a>, <a href="https://arxiv.org/format/2202.06935">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Repairing the Cracked Foundation: A Survey of Obstacles in Evaluation Practices for Generated Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gehrmann%2C+S">Sebastian Gehrmann</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Sellam%2C+T">Thibault Sellam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.06935v1-abstract-short" style="display: inline;"> Evaluation practices in natural language generation (NLG) have many known flaws, but improved evaluation approaches are rarely widely adopted. This issue has become more urgent, since neural NLG models have improved to the point where they can often no longer be distinguished based on the surface-level features that older metrics rely on. This paper surveys the issues with human and automatic mode&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06935v1-abstract-full').style.display = 'inline'; document.getElementById('2202.06935v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.06935v1-abstract-full" style="display: none;"> Evaluation practices in natural language generation (NLG) have many known flaws, but improved evaluation approaches are rarely widely adopted. This issue has become more urgent, since neural NLG models have improved to the point where they can often no longer be distinguished based on the surface-level features that older metrics rely on. This paper surveys the issues with human and automatic model evaluations and with commonly used datasets in NLG that have been pointed out over the past 20 years. We summarize, categorize, and discuss how researchers have been addressing these issues and what their findings mean for the current state of model evaluations. Building on those insights, we lay out a long-term vision for NLG evaluation and propose concrete steps for researchers to improve their evaluation processes. Finally, we analyze 66 NLG papers from recent NLP conferences in how well they already follow these suggestions and identify which areas require more drastic changes to the status quo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06935v1-abstract-full').style.display = 'none'; document.getElementById('2202.06935v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.00061">arXiv:2107.00061</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2107.00061">pdf</a>, <a href="https://arxiv.org/format/2107.00061">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> All That&#39;s &#39;Human&#39; Is Not Gold: Evaluating Human Evaluation of Generated Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=August%2C+T">Tal August</a>, <a href="/search/cs?searchtype=author&amp;query=Serrano%2C+S">Sofia Serrano</a>, <a href="/search/cs?searchtype=author&amp;query=Haduong%2C+N">Nikita Haduong</a>, <a href="/search/cs?searchtype=author&amp;query=Gururangan%2C+S">Suchin Gururangan</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+N+A">Noah A. Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.00061v2-abstract-short" style="display: inline;"> Human evaluations are typically considered the gold standard in natural language generation, but as models&#39; fluency improves, how well can evaluators detect and judge machine-generated text? We run a study assessing non-experts&#39; ability to distinguish between human- and machine-authored text (GPT2 and GPT3) in three domains (stories, news articles, and recipes). We find that, without training, eva&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.00061v2-abstract-full').style.display = 'inline'; document.getElementById('2107.00061v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.00061v2-abstract-full" style="display: none;"> Human evaluations are typically considered the gold standard in natural language generation, but as models&#39; fluency improves, how well can evaluators detect and judge machine-generated text? We run a study assessing non-experts&#39; ability to distinguish between human- and machine-authored text (GPT2 and GPT3) in three domains (stories, news articles, and recipes). We find that, without training, evaluators distinguished between GPT3- and human-authored text at random chance level. We explore three approaches for quickly training evaluators to better identify GPT3-authored text (detailed instructions, annotated examples, and paired examples) and find that while evaluators&#39; accuracy improved up to 55%, it did not significantly improve across the three domains. Given the inconsistent results across text domains and the often contradictory reasons evaluators gave for their judgments, we examine the role untrained human evaluations play in NLG evaluation and provide recommendations to NLG researchers for improving human evaluations of text generated from state-of-the-art models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.00061v2-abstract-full').style.display = 'none'; document.getElementById('2107.00061v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">references added, corrected typo</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.13476">arXiv:2102.13476</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2102.13476">pdf</a>, <a href="https://arxiv.org/format/2102.13476">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> PySensors: A Python Package for Sparse Sensor Placement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=de+Silva%2C+B+M">Brian M. de Silva</a>, <a href="/search/cs?searchtype=author&amp;query=Manohar%2C+K">Krithika Manohar</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Emily Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Brunton%2C+B+W">Bingni W. Brunton</a>, <a href="/search/cs?searchtype=author&amp;query=Brunton%2C+S+L">Steven L. Brunton</a>, <a href="/search/cs?searchtype=author&amp;query=Kutz%2C+J+N">J. Nathan Kutz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.13476v1-abstract-short" style="display: inline;"> PySensors is a Python package for selecting and placing a sparse set of sensors for classification and reconstruction tasks. Specifically, PySensors implements algorithms for data-driven sparse sensor placement optimization for reconstruction (SSPOR) and sparse sensor placement optimization for classification (SSPOC). In this work we provide a brief description of the mathematical algorithms and t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.13476v1-abstract-full').style.display = 'inline'; document.getElementById('2102.13476v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.13476v1-abstract-full" style="display: none;"> PySensors is a Python package for selecting and placing a sparse set of sensors for classification and reconstruction tasks. Specifically, PySensors implements algorithms for data-driven sparse sensor placement optimization for reconstruction (SSPOR) and sparse sensor placement optimization for classification (SSPOC). In this work we provide a brief description of the mathematical algorithms and theory for sparse sensor optimization, along with an overview and demonstration of the features implemented in PySensors (with code examples). We also include practical advice for user and a list of potential extensions to PySensors. Software is available at https://github.com/dynamicslab/pysensors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.13476v1-abstract-full').style.display = 'none'; document.getElementById('2102.13476v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.12247">arXiv:2008.12247</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2008.12247">pdf</a>, <a href="https://arxiv.org/format/2008.12247">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bracketing brackets with bras and kets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Emily Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Vincent%2C+A">Angelie Vincent</a>, <a href="/search/cs?searchtype=author&amp;query=Kutz%2C+J+N">J. Nathan Kutz</a>, <a href="/search/cs?searchtype=author&amp;query=Brunton%2C+S+L">Steven L. Brunton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.12247v1-abstract-short" style="display: inline;"> Brackets are an essential component in aircraft manufacture and design, joining parts together, supporting weight, holding wires, and strengthening joints. Hundreds or thousands of unique brackets are used in every aircraft, but manufacturing a large number of distinct brackets is inefficient and expensive. Fortunately, many so-called &#34;different&#34; brackets are in fact very similar or even identical&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.12247v1-abstract-full').style.display = 'inline'; document.getElementById('2008.12247v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.12247v1-abstract-full" style="display: none;"> Brackets are an essential component in aircraft manufacture and design, joining parts together, supporting weight, holding wires, and strengthening joints. Hundreds or thousands of unique brackets are used in every aircraft, but manufacturing a large number of distinct brackets is inefficient and expensive. Fortunately, many so-called &#34;different&#34; brackets are in fact very similar or even identical to each other. In this manuscript, we present a data-driven framework for constructing a comparatively small group of representative brackets from a large catalog of current brackets, based on hierarchical clustering of bracket data. We find that for a modern commercial aircraft, the full set of brackets can be reduced by 30\% while still describing half of the test set sufficiently accurately. This approach is based on designing an inner product that quantifies a multi-objective similarity between two brackets, which are the &#34;bra&#34; and the &#34;ket&#34; of the inner product. Although we demonstrate this algorithm to reduce the number of brackets in aerospace manufacturing, it may be generally applied to any large-scale component standardization effort. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.12247v1-abstract-full').style.display = 'none'; document.getElementById('2008.12247v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.14799">arXiv:2006.14799</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2006.14799">pdf</a>, <a href="https://arxiv.org/format/2006.14799">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Evaluation of Text Generation: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Celikyilmaz%2C+A">Asli Celikyilmaz</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+J">Jianfeng Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.14799v2-abstract-short" style="display: inline;"> The paper surveys evaluation methods of natural language generation (NLG) systems that have been developed in the last few years. We group NLG evaluation methods into three categories: (1) human-centric evaluation metrics, (2) automatic metrics that require no training, and (3) machine-learned metrics. For each category, we discuss the progress that has been made and the challenges still being fac&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.14799v2-abstract-full').style.display = 'inline'; document.getElementById('2006.14799v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.14799v2-abstract-full" style="display: none;"> The paper surveys evaluation methods of natural language generation (NLG) systems that have been developed in the last few years. We group NLG evaluation methods into three categories: (1) human-centric evaluation metrics, (2) automatic metrics that require no training, and (3) machine-learned metrics. For each category, we discuss the progress that has been made and the challenges still being faced, with a focus on the evaluation of recently proposed NLG tasks and neural NLG models. We then present two examples for task-specific NLG evaluations for automatic text summarization and long text generation, and conclude the paper by proposing future research directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.14799v2-abstract-full').style.display = 'none'; document.getElementById('2006.14799v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">47 pages (revised version)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.12129">arXiv:2004.12129</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.12129">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Assessing the impact of the coronavirus lockdown on unhappiness, loneliness, and boredom using Google Trends </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Brodeur%2C+A">Abel Brodeur</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+A+E">Andrew E. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Fleche%2C+S">Sarah Fleche</a>, <a href="/search/cs?searchtype=author&amp;query=Powdthavee%2C+N">Nattavudh Powdthavee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.12129v1-abstract-short" style="display: inline;"> The COVID-19 pandemic has led many governments to implement lockdowns. While lockdowns may help to contain the spread of the virus, it is possible that substantial damage to population well-being will result. This study relies on Google Trends data and tests whether the lockdowns implemented in Europe and America led to changes in well-being related topic search terms. Using different methods to e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.12129v1-abstract-full').style.display = 'inline'; document.getElementById('2004.12129v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.12129v1-abstract-full" style="display: none;"> The COVID-19 pandemic has led many governments to implement lockdowns. While lockdowns may help to contain the spread of the virus, it is possible that substantial damage to population well-being will result. This study relies on Google Trends data and tests whether the lockdowns implemented in Europe and America led to changes in well-being related topic search terms. Using different methods to evaluate the causal effects of lockdown, we find a substantial increase in the search intensity for boredom in Europe and the US. We also found a significant increase in searches for loneliness, worry and sadness, while searches for stress, suicide and divorce on the contrary fell. Our results suggest that people&#39;s mental health may have been severely affected by the lockdown. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.12129v1-abstract-full').style.display = 'none'; document.getElementById('2004.12129v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.03607">arXiv:2004.03607</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2004.03607">pdf</a>, <a href="https://arxiv.org/format/2004.03607">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TuringAdvice: A Generative and Dynamic Evaluation of Language Use </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zellers%2C+R">Rowan Zellers</a>, <a href="/search/cs?searchtype=author&amp;query=Holtzman%2C+A">Ari Holtzman</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+L">Lianhui Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Farhadi%2C+A">Ali Farhadi</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+Y">Yejin Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.03607v2-abstract-short" style="display: inline;"> We propose TuringAdvice, a new challenge task and dataset for language understanding models. Given a written situation that a real person is currently facing, a model must generate helpful advice in natural language. Our evaluation framework tests a fundamental aspect of human language understanding: our ability to use language to resolve open-ended situations by communicating with each other. E&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.03607v2-abstract-full').style.display = 'inline'; document.getElementById('2004.03607v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.03607v2-abstract-full" style="display: none;"> We propose TuringAdvice, a new challenge task and dataset for language understanding models. Given a written situation that a real person is currently facing, a model must generate helpful advice in natural language. Our evaluation framework tests a fundamental aspect of human language understanding: our ability to use language to resolve open-ended situations by communicating with each other. Empirical results show that today&#39;s models struggle at TuringAdvice, even multibillion parameter models finetuned on 600k in-domain training examples. The best model, a finetuned T5, writes advice that is at least as helpful as human-written advice in only 14% of cases; a much larger non-finetunable GPT3 model does even worse at 4%. This low performance reveals language understanding errors that are hard to spot outside of a generative setting, showing much room for progress. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.03607v2-abstract-full').style.display = 'none'; document.getElementById('2004.03607v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2021 camera ready. Project page at https://rowanzellers.com/advice</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.12978">arXiv:1910.12978</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1910.12978">pdf</a>, <a href="https://arxiv.org/format/1910.12978">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3389/fvets.2020.00130">10.3389/fvets.2020.00130 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Effects of Social Cues on Biosecurity Compliance in Livestock Facilities: Evidence from Experimental Simulations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Trinity%2C+L">Luke Trinity</a>, <a href="/search/cs?searchtype=author&amp;query=Merrill%2C+S+C">Scott C. Merrill</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Eric Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Koliba%2C+C+J">Christopher J. Koliba</a>, <a href="/search/cs?searchtype=author&amp;query=Zia%2C+A">Asim Zia</a>, <a href="/search/cs?searchtype=author&amp;query=Bucini%2C+G">Gabriela Bucini</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+J+M">Julia M. Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.12978v1-abstract-short" style="display: inline;"> Disease outbreaks in U.S. animal livestock industries have economic impacts measured in hundreds of millions of dollars per year. Biosecurity, or procedures intended to protect animals against disease, is known to be effective at reducing infection risk at facilities. Yet to the detriment of animal health, humans do not always follow biosecurity protocols. Human behavioral factors have been shown&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.12978v1-abstract-full').style.display = 'inline'; document.getElementById('1910.12978v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.12978v1-abstract-full" style="display: none;"> Disease outbreaks in U.S. animal livestock industries have economic impacts measured in hundreds of millions of dollars per year. Biosecurity, or procedures intended to protect animals against disease, is known to be effective at reducing infection risk at facilities. Yet to the detriment of animal health, humans do not always follow biosecurity protocols. Human behavioral factors have been shown to influence willingness to follow biosecurity protocols. Here we show how social cues may affect cooperation with a biosecurity practice. Participants were immersed in a simulated swine production facility through a graphical user interface and prompted to make a decision that addressed their willingness to comply with a biosecurity practice. We tested the effect of varying three experimental variables: (1) the risk of acquiring an infection, (2) the delivery method of the infection risk information (numerical versus graphical), and (3) behavior of an automated coworker in the facility. We provide evidence that participants changed their behavior when they observed a simulated worker making a choice to follow or not follow a biosecurity protocol, even though the simulated worker had no economic effect on the participants&#39; payouts. These results advance the understanding of human behavioral effects on biosecurity protocol decisions; demonstrating that social cues need to be considered by livestock facility managers when developing policies to make agricultural systems more disease resilient. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.12978v1-abstract-full').style.display = 'none'; document.getElementById('1910.12978v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 4 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.12905">arXiv:1909.12905</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1909.12905">pdf</a>, <a href="https://arxiv.org/format/1909.12905">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1371/journal.pone.0228983">10.1371/journal.pone.0228983 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Using Digital Field Experiments To Elicit Risk Mitigation Behavioral Strategies For Disease Management Across Agricultural Production Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E+M">Eric M. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Merrill%2C+S+C">Scott C. Merrill</a>, <a href="/search/cs?searchtype=author&amp;query=Trinity%2C+L">Luke Trinity</a>, <a href="/search/cs?searchtype=author&amp;query=Bucini%2C+G">Gabriela Bucini</a>, <a href="/search/cs?searchtype=author&amp;query=Cheney%2C+N">Nicholas Cheney</a>, <a href="/search/cs?searchtype=author&amp;query=Langle-Chimal%2C+O">Ollin Langle-Chimal</a>, <a href="/search/cs?searchtype=author&amp;query=Shrum%2C+T">Trisha Shrum</a>, <a href="/search/cs?searchtype=author&amp;query=Koliba%2C+C">Christopher Koliba</a>, <a href="/search/cs?searchtype=author&amp;query=Zia%2C+A">Asim Zia</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+J+M">Julia M. Smith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.12905v2-abstract-short" style="display: inline;"> Failing to mitigate propagation of disease spread can result in dire economic consequences for agricultural networks. Pathogens like Porcine Epidemic Diarrhea virus, can quickly spread among producers. Biosecurity is designed to prevent infection transmission. When considering biosecurity investments, management must balance the cost of protection versus the consequences of contracting an infectio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.12905v2-abstract-full').style.display = 'inline'; document.getElementById('1909.12905v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.12905v2-abstract-full" style="display: none;"> Failing to mitigate propagation of disease spread can result in dire economic consequences for agricultural networks. Pathogens like Porcine Epidemic Diarrhea virus, can quickly spread among producers. Biosecurity is designed to prevent infection transmission. When considering biosecurity investments, management must balance the cost of protection versus the consequences of contracting an infection. Thus, an examination of the decision making processes associated with investment in biosecurity is important for enhancing system wide biosecurity. Data gathered from digital field experiments can provide insights into behavioral strategies and inform the development of decision support systems. We created an online digital experiment to simulate outbreak scenarios among swine production supply chains, where participants were tasked with making biosecurity investment decisions. In Experiment One, we quantified the risk associated with each participant&#39;s decisions and delineated three dominant categories of risk attitudes: risk averse, risk tolerant, and opportunistic. Each risk class exhibited unique approaches in reaction to risk and disease information. We also tested how information uncertainty affects risk aversion, by varying the amount of visibility of the infection as well as the amount of biosecurity implemented across the system. We found evidence that more visibility in the number of infected sites increases risk averse behaviors, while more visibility in the amount of neighboring biosecurity increased risk taking behaviors. In Experiment Two, we were surprised to find no evidence for differences in behavior of livestock specialists compared to Amazon Mechanical Turk participants. Our findings provide support for using digital field experiments to study how risk communication affects behavior, which can provide insights towards more effective messaging strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.12905v2-abstract-full').style.display = 'none'; document.getElementById('1909.12905v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.04076">arXiv:1909.04076</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1909.04076">pdf</a>, <a href="https://arxiv.org/format/1909.04076">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Counterfactual Story Reasoning and Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qin%2C+L">Lianhui Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Bosselut%2C+A">Antoine Bosselut</a>, <a href="/search/cs?searchtype=author&amp;query=Holtzman%2C+A">Ari Holtzman</a>, <a href="/search/cs?searchtype=author&amp;query=Bhagavatula%2C+C">Chandra Bhagavatula</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+Y">Yejin Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.04076v2-abstract-short" style="display: inline;"> Counterfactual reasoning requires predicting how alternative events, contrary to what actually happened, might have resulted in different outcomes. Despite being considered a necessary component of AI-complete systems, few resources have been developed for evaluating counterfactual reasoning in narratives. In this paper, we propose Counterfactual Story Rewriting: given an original story and an i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.04076v2-abstract-full').style.display = 'inline'; document.getElementById('1909.04076v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.04076v2-abstract-full" style="display: none;"> Counterfactual reasoning requires predicting how alternative events, contrary to what actually happened, might have resulted in different outcomes. Despite being considered a necessary component of AI-complete systems, few resources have been developed for evaluating counterfactual reasoning in narratives. In this paper, we propose Counterfactual Story Rewriting: given an original story and an intervening counterfactual event, the task is to minimally revise the story to make it compatible with the given counterfactual event. Solving this task will require deep understanding of causal narrative chains and counterfactual invariance, and integration of such story reasoning capabilities into conditional language generation models. We present TimeTravel, a new dataset of 29,849 counterfactual rewritings, each with the original story, a counterfactual event, and human-generated revision of the original story compatible with the counterfactual event. Additionally, we include 80,115 counterfactual &#34;branches&#34; without a rewritten storyline to support future work on semi- or un-supervised approaches to counterfactual story rewriting. Finally, we evaluate the counterfactual rewriting capacities of several competitive baselines based on pretrained language models, and assess whether common overlap and model-based automatic metrics for text generation correlate well with human scores for counterfactual rewriting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.04076v2-abstract-full').style.display = 'none'; document.getElementById('1909.04076v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1805.09959">arXiv:1805.09959</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1805.09959">pdf</a>, <a href="https://arxiv.org/format/1805.09959">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> A Sentiment Analysis of Breast Cancer Treatment Experiences and Healthcare Perceptions Across Twitter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E+M">Eric M. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=James%2C+T">Ted James</a>, <a href="/search/cs?searchtype=author&amp;query=Jones%2C+C+A">Chris A. Jones</a>, <a href="/search/cs?searchtype=author&amp;query=Alapati%2C+A">Amulya Alapati</a>, <a href="/search/cs?searchtype=author&amp;query=Ukandu%2C+P">Promise Ukandu</a>, <a href="/search/cs?searchtype=author&amp;query=Danforth%2C+C+M">Christopher M. Danforth</a>, <a href="/search/cs?searchtype=author&amp;query=Dodds%2C+P+S">Peter Sheridan Dodds</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1805.09959v2-abstract-short" style="display: inline;"> Background: Social media has the capacity to afford the healthcare industry with valuable feedback from patients who reveal and express their medical decision-making process, as well as self-reported quality of life indicators both during and post treatment. In prior work, [Crannell et. al.], we have studied an active cancer patient population on Twitter and compiled a set of tweets describing the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.09959v2-abstract-full').style.display = 'inline'; document.getElementById('1805.09959v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1805.09959v2-abstract-full" style="display: none;"> Background: Social media has the capacity to afford the healthcare industry with valuable feedback from patients who reveal and express their medical decision-making process, as well as self-reported quality of life indicators both during and post treatment. In prior work, [Crannell et. al.], we have studied an active cancer patient population on Twitter and compiled a set of tweets describing their experience with this disease. We refer to these online public testimonies as &#34;Invisible Patient Reported Outcomes&#34; (iPROs), because they carry relevant indicators, yet are difficult to capture by conventional means of self-report. Methods: Our present study aims to identify tweets related to the patient experience as an additional informative tool for monitoring public health. Using Twitter&#39;s public streaming API, we compiled over 5.3 million &#34;breast cancer&#34; related tweets spanning September 2016 until mid December 2017. We combined supervised machine learning methods with natural language processing to sift tweets relevant to breast cancer patient experiences. We analyzed a sample of 845 breast cancer patient and survivor accounts, responsible for over 48,000 posts. We investigated tweet content with a hedonometric sentiment analysis to quantitatively extract emotionally charged topics. Results: We found that positive experiences were shared regarding patient treatment, raising support, and spreading awareness. Further discussions related to healthcare were prevalent and largely negative focusing on fear of political legislation that could result in loss of coverage. Conclusions: Social media can provide a positive outlet for patients to discuss their needs and concerns regarding their healthcare coverage and treatment needs. Capturing iPROs from online communication can help inform healthcare professionals and lead to more connected and personalized treatment regimens. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1805.09959v2-abstract-full').style.display = 'none'; document.getElementById('1805.09959v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1804.10202">arXiv:1804.10202</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1804.10202">pdf</a>, <a href="https://arxiv.org/format/1804.10202">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Sounding Board: A User-Centric and Content-Driven Social Chatbot </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fang%2C+H">Hao Fang</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+H">Hao Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Sap%2C+M">Maarten Sap</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Elizabeth Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Holtzman%2C+A">Ari Holtzman</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+Y">Yejin Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Smith%2C+N+A">Noah A. Smith</a>, <a href="/search/cs?searchtype=author&amp;query=Ostendorf%2C+M">Mari Ostendorf</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1804.10202v1-abstract-short" style="display: inline;"> We present Sounding Board, a social chatbot that won the 2017 Amazon Alexa Prize. The system architecture consists of several components including spoken language processing, dialogue management, language generation, and content management, with emphasis on user-centric and content-driven design. We also share insights gained from large-scale online logs based on 160,000 conversations with real-wo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.10202v1-abstract-full').style.display = 'inline'; document.getElementById('1804.10202v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1804.10202v1-abstract-full" style="display: none;"> We present Sounding Board, a social chatbot that won the 2017 Amazon Alexa Prize. The system architecture consists of several components including spoken language processing, dialogue management, language generation, and content management, with emphasis on user-centric and content-driven design. We also share insights gained from large-scale online logs based on 160,000 conversations with real-world users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1804.10202v1-abstract-full').style.display = 'none'; document.getElementById('1804.10202v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 April, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, NAACL 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1802.06220">arXiv:1802.06220</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1802.06220">pdf</a>, <a href="https://arxiv.org/ps/1802.06220">ps</a>, <a href="https://arxiv.org/format/1802.06220">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Fusion of finite set distributions: Pointwise consistency and global cardinality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=%C3%9Cney%2C+M">Murat 脺ney</a>, <a href="/search/cs?searchtype=author&amp;query=Houssineau%2C+J">J茅r茅mie Houssineau</a>, <a href="/search/cs?searchtype=author&amp;query=Delande%2C+E">Emmanuel Delande</a>, <a href="/search/cs?searchtype=author&amp;query=Julier%2C+S+J">Simon J. Julier</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+D+E">Daniel E. Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1802.06220v2-abstract-short" style="display: inline;"> A recent trend in distributed multi-sensor fusion is to use random finite set filters at the sensor nodes and fuse the filtered distributions algorithmically using their exponential mixture densities (EMDs). Fusion algorithms which extend the celebrated covariance intersection and consensus based approaches are such examples. In this article, we analyse the variational principle underlying EMDs an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.06220v2-abstract-full').style.display = 'inline'; document.getElementById('1802.06220v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1802.06220v2-abstract-full" style="display: none;"> A recent trend in distributed multi-sensor fusion is to use random finite set filters at the sensor nodes and fuse the filtered distributions algorithmically using their exponential mixture densities (EMDs). Fusion algorithms which extend the celebrated covariance intersection and consensus based approaches are such examples. In this article, we analyse the variational principle underlying EMDs and show that the EMDs of finite set distributions do not necessarily lead to consistent fusion of cardinality distributions. Indeed, we demonstrate that these inconsistencies may occur with overwhelming probability in practice, through examples with Bernoulli, Poisson and independent identically distributed (IID) cluster processes. We prove that pointwise consistency of EMDs does not imply consistency in global cardinality and vice versa. Then, we redefine the variational problems underlying fusion and provide iterative solutions thereby establishing a framework that guarantees cardinality consistent fusion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1802.06220v2-abstract-full').style.display = 'none'; document.getElementById('1802.06220v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted for publication in the IEEE Transactions on Aerospace and Electronics Systems</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1708.00842">arXiv:1708.00842</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1708.00842">pdf</a>, <a href="https://arxiv.org/ps/1708.00842">ps</a>, <a href="https://arxiv.org/format/1708.00842">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> </div> </div> <p class="title is-5 mathjax"> Latent Parameter Estimation in Fusion Networks Using Separable Likelihoods </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Uney%2C+M">Murat Uney</a>, <a href="/search/cs?searchtype=author&amp;query=Mulgrew%2C+B">Bernard Mulgrew</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+D+E">Daniel E Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1708.00842v2-abstract-short" style="display: inline;"> Multi-sensor state space models underpin fusion applications in networks of sensors. Estimation of latent parameters in these models has the potential to provide highly desirable capabilities such as network self-calibration. Conventional solutions to the problem pose difficulties in scaling with the number of sensors due to the joint multi-sensor filtering involved when evaluating the parameter l&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1708.00842v2-abstract-full').style.display = 'inline'; document.getElementById('1708.00842v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1708.00842v2-abstract-full" style="display: none;"> Multi-sensor state space models underpin fusion applications in networks of sensors. Estimation of latent parameters in these models has the potential to provide highly desirable capabilities such as network self-calibration. Conventional solutions to the problem pose difficulties in scaling with the number of sensors due to the joint multi-sensor filtering involved when evaluating the parameter likelihood. In this article, we propose a separable pseudo-likelihood which is a more accurate approximation compared to a previously proposed alternative under typical operating conditions. In addition, we consider using separable likelihoods in the presence of many objects and ambiguity in associating measurements with objects that originated them. To this end, we use a state space model with a hypothesis based parameterisation, and, develop an empirical Bayesian perspective in order to evaluate separable likelihoods on this model using local filtering. Bayesian inference with this likelihood is carried out using belief propagation on the associated pairwise Markov random field. We specify a particle algorithm for latent parameter estimation in a linear Gaussian state space model and demonstrate its efficacy for network self-calibration using measurements from non-cooperative targets in comparison with alternatives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1708.00842v2-abstract-full').style.display = 'none'; document.getElementById('1708.00842v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 August, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted with minor revisions, IEEE Transactions on Signal and Information Processing Over Networks</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1611.02989">arXiv:1611.02989</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1611.02989">pdf</a>, <a href="https://arxiv.org/ps/1611.02989">ps</a>, <a href="https://arxiv.org/format/1611.02989">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Bayesian data assimilation based on a family of outer measures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Houssineau%2C+J">Jeremie Houssineau</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+D+E">Daniel E. Clark</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1611.02989v1-abstract-short" style="display: inline;"> A flexible representation of uncertainty that remains within the standard framework of probabilistic measure theory is presented along with a study of its properties. This representation relies on a specific type of outer measure that is based on the measure of a supremum, hence combining additive and highly sub-additive components. It is shown that this type of outer measure enables the introduct&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1611.02989v1-abstract-full').style.display = 'inline'; document.getElementById('1611.02989v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1611.02989v1-abstract-full" style="display: none;"> A flexible representation of uncertainty that remains within the standard framework of probabilistic measure theory is presented along with a study of its properties. This representation relies on a specific type of outer measure that is based on the measure of a supremum, hence combining additive and highly sub-additive components. It is shown that this type of outer measure enables the introduction of intuitive concepts such as pullback and general data assimilation operations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1611.02989v1-abstract-full').style.display = 'none'; document.getElementById('1611.02989v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 60A10; 62C10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1508.01843">arXiv:1508.01843</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1508.01843">pdf</a>, <a href="https://arxiv.org/format/1508.01843">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Vaporous Marketing: Uncovering Pervasive Electronic Cigarette Advertisements on Twitter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E+M">Eric M. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Jones%2C+C+A">Chris A. Jones</a>, <a href="/search/cs?searchtype=author&amp;query=Williams%2C+J+R">Jake Ryland Williams</a>, <a href="/search/cs?searchtype=author&amp;query=Kurti%2C+A+N">Allison N. Kurti</a>, <a href="/search/cs?searchtype=author&amp;query=Nortotsky%2C+M+C">Michell Craig Nortotsky</a>, <a href="/search/cs?searchtype=author&amp;query=Danforth%2C+C+M">Christopher M. Danforth</a>, <a href="/search/cs?searchtype=author&amp;query=Dodds%2C+P+S">Peter Sheridan Dodds</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1508.01843v2-abstract-short" style="display: inline;"> Background: Twitter has become the &#34;wild-west&#34; of marketing and promotional strategies for advertisement agencies. Electronic cigarettes have been heavily marketed across Twitter feeds, offering discounts, &#34;kid-friendly&#34; flavors, algorithmically generated false testimonials, and free samples. Methods:All electronic cigarette keyword related tweets from a 10% sample of Twitter spanning January 2012&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1508.01843v2-abstract-full').style.display = 'inline'; document.getElementById('1508.01843v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1508.01843v2-abstract-full" style="display: none;"> Background: Twitter has become the &#34;wild-west&#34; of marketing and promotional strategies for advertisement agencies. Electronic cigarettes have been heavily marketed across Twitter feeds, offering discounts, &#34;kid-friendly&#34; flavors, algorithmically generated false testimonials, and free samples. Methods:All electronic cigarette keyword related tweets from a 10% sample of Twitter spanning January 2012 through December 2014 (approximately 850,000 total tweets) were identified and categorized as Automated or Organic by combining a keyword classification and a machine trained Human Detection algorithm. A sentiment analysis using Hedonometrics was performed on Organic tweets to quantify the change in consumer sentiments over time. Commercialized tweets were topically categorized with key phrasal pattern matching. Results:The overwhelming majority (80%) of tweets were classified as automated or promotional in nature. The majority of these tweets were coded as commercialized (83.65% in 2013), up to 33% of which offered discounts or free samples and appeared on over a billion twitter feeds as impressions. The positivity of Organic (human) classified tweets has decreased over time (5.84 in 2013 to 5.77 in 2014) due to a relative increase in the negative words ban,tobacco,doesn&#39;t,drug,against,poison,tax and a relative decrease in the positive words like haha,good,cool. Automated tweets are more positive than organic (6.17 versus 5.84) due to a relative increase in the marketing words best,win,buy,sale,health,discount and a relative decrease in negative words like bad, hate, stupid, don&#39;t. Conclusions:Due to the youth presence on Twitter and the clinical uncertainty of the long term health complications of electronic cigarette consumption, the protection of public health warrants scrutiny and potential regulation of social media marketing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1508.01843v2-abstract-full').style.display = 'none'; document.getElementById('1508.01843v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2015. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1505.06750">arXiv:1505.06750</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1505.06750">pdf</a>, <a href="https://arxiv.org/format/1505.06750">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1073/pnas.1505647112">10.1073/pnas.1505647112 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Reply to Garcia et al.: Common mistakes in measuring frequency dependent word characteristics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dodds%2C+P+S">P. S. Dodds</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E+M">E. M. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Desu%2C+S">S. Desu</a>, <a href="/search/cs?searchtype=author&amp;query=Frank%2C+M+R">M. R. Frank</a>, <a href="/search/cs?searchtype=author&amp;query=Reagan%2C+A+J">A. J. Reagan</a>, <a href="/search/cs?searchtype=author&amp;query=Williams%2C+J+R">J. R. Williams</a>, <a href="/search/cs?searchtype=author&amp;query=Mitchell%2C+L">L. Mitchell</a>, <a href="/search/cs?searchtype=author&amp;query=Harris%2C+K+D">K. D. Harris</a>, <a href="/search/cs?searchtype=author&amp;query=Kloumann%2C+I+M">I. M. Kloumann</a>, <a href="/search/cs?searchtype=author&amp;query=Bagrow%2C+J+P">J. P. Bagrow</a>, <a href="/search/cs?searchtype=author&amp;query=Megerdoomian%2C+K">K. Megerdoomian</a>, <a href="/search/cs?searchtype=author&amp;query=McMahon%2C+M+T">M. T. McMahon</a>, <a href="/search/cs?searchtype=author&amp;query=Tivnan%2C+B+F">B. F. Tivnan</a>, <a href="/search/cs?searchtype=author&amp;query=Danforth%2C+C+M">C. M. Danforth</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1505.06750v2-abstract-short" style="display: inline;"> We demonstrate that the concerns expressed by Garcia et al. are misplaced, due to (1) a misreading of our findings in [1]; (2) a widespread failure to examine and present words in support of asserted summary quantities based on word usage frequencies; and (3) a range of misconceptions about word usage frequency, word rank, and expert-constructed word lists. In particular, we show that the English&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1505.06750v2-abstract-full').style.display = 'inline'; document.getElementById('1505.06750v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1505.06750v2-abstract-full" style="display: none;"> We demonstrate that the concerns expressed by Garcia et al. are misplaced, due to (1) a misreading of our findings in [1]; (2) a widespread failure to examine and present words in support of asserted summary quantities based on word usage frequencies; and (3) a range of misconceptions about word usage frequency, word rank, and expert-constructed word lists. In particular, we show that the English component of our study compares well statistically with two related surveys, that no survey design influence is apparent, and that estimates of measurement error do not explain the positivity biases reported in our work and that of others. We further demonstrate that for the frequency dependence of positivity---of which we explored the nuances in great detail in [1]---Garcia et al. did not perform a reanalysis of our data---they instead carried out an analysis of a different, statistically improper data set and introduced a nonlinearity before performing linear regression. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1505.06750v2-abstract-full').style.display = 'none'; document.getElementById('1505.06750v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2015; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures, 1 table. Expanded version of reply appearing in PNAS 2015</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1505.04342">arXiv:1505.04342</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1505.04342">pdf</a>, <a href="https://arxiv.org/format/1505.04342">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Sifting Robotic from Organic Text: A Natural Language Approach for Detecting Automation on Twitter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E+M">Eric M. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Williams%2C+J+R">Jake Ryland Williams</a>, <a href="/search/cs?searchtype=author&amp;query=Jones%2C+C+A">Chris A. Jones</a>, <a href="/search/cs?searchtype=author&amp;query=Galbraith%2C+R+A">Richard A. Galbraith</a>, <a href="/search/cs?searchtype=author&amp;query=Danforth%2C+C+M">Christopher M. Danforth</a>, <a href="/search/cs?searchtype=author&amp;query=Dodds%2C+P+S">Peter Sheridan Dodds</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1505.04342v6-abstract-short" style="display: inline;"> Twitter, a popular social media outlet, has evolved into a vast source of linguistic data, rich with opinion, sentiment, and discussion. Due to the increasing popularity of Twitter, its perceived potential for exerting social influence has led to the rise of a diverse community of automatons, commonly referred to as bots. These inorganic and semi-organic Twitter entities can range from the benevol&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1505.04342v6-abstract-full').style.display = 'inline'; document.getElementById('1505.04342v6-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1505.04342v6-abstract-full" style="display: none;"> Twitter, a popular social media outlet, has evolved into a vast source of linguistic data, rich with opinion, sentiment, and discussion. Due to the increasing popularity of Twitter, its perceived potential for exerting social influence has led to the rise of a diverse community of automatons, commonly referred to as bots. These inorganic and semi-organic Twitter entities can range from the benevolent (e.g., weather-update bots, help-wanted-alert bots) to the malevolent (e.g., spamming messages, advertisements, or radical opinions). Existing detection algorithms typically leverage meta-data (time between tweets, number of followers, etc.) to identify robotic accounts. Here, we present a powerful classification scheme that exclusively uses the natural language text from organic users to provide a criterion for identifying accounts posting automated messages. Since the classifier operates on text alone, it is flexible and may be applied to any textual data beyond the Twitter-sphere. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1505.04342v6-abstract-full').style.display = 'none'; document.getElementById('1505.04342v6-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 May, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2015. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1503.02120">arXiv:1503.02120</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1503.02120">pdf</a>, <a href="https://arxiv.org/format/1503.02120">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Identifying missing dictionary entries with frequency-conserving context models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Williams%2C+J+R">Jake Ryland Williams</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E+M">Eric M. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Bagrow%2C+J+P">James P. Bagrow</a>, <a href="/search/cs?searchtype=author&amp;query=Danforth%2C+C+M">Christopher M. Danforth</a>, <a href="/search/cs?searchtype=author&amp;query=Dodds%2C+P+S">Peter Sheridan Dodds</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1503.02120v3-abstract-short" style="display: inline;"> In an effort to better understand meaning from natural language texts, we explore methods aimed at organizing lexical objects into contexts. A number of these methods for organization fall into a family defined by word ordering. Unlike demographic or spatial partitions of data, these collocation models are of special importance for their universal applicability. While we are interested here in tex&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1503.02120v3-abstract-full').style.display = 'inline'; document.getElementById('1503.02120v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1503.02120v3-abstract-full" style="display: none;"> In an effort to better understand meaning from natural language texts, we explore methods aimed at organizing lexical objects into contexts. A number of these methods for organization fall into a family defined by word ordering. Unlike demographic or spatial partitions of data, these collocation models are of special importance for their universal applicability. While we are interested here in text and have framed our treatment appropriately, our work is potentially applicable to other areas of research (e.g., speech, genomics, and mobility patterns) where one has ordered categorical data, (e.g., sounds, genes, and locations). Our approach focuses on the phrase (whether word or larger) as the primary meaning-bearing lexical unit and object of study. To do so, we employ our previously developed framework for generating word-conserving phrase-frequency data. Upon training our model with the Wiktionary---an extensive, online, collaborative, and open-source dictionary that contains over 100,000 phrasal-definitions---we develop highly effective filters for the identification of meaningful, missing phrase-entries. With our predictions we then engage the editorial community of the Wiktionary and propose short lists of potential missing entries for definition, developing a breakthrough, lexical extraction technique, and expanding our knowledge of the defined English lexicon of phrases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1503.02120v3-abstract-full').style.display = 'none'; document.getElementById('1503.02120v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2015; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 March, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 6 figures, and 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1406.5181">arXiv:1406.5181</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1406.5181">pdf</a>, <a href="https://arxiv.org/format/1406.5181">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> </div> </div> <p class="title is-5 mathjax"> Zipf&#39;s law holds for phrases, not words </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Williams%2C+J+R">Jake Ryland Williams</a>, <a href="/search/cs?searchtype=author&amp;query=Lessard%2C+P+R">Paul R. Lessard</a>, <a href="/search/cs?searchtype=author&amp;query=Desu%2C+S">Suma Desu</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E">Eric Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Bagrow%2C+J+P">James P. Bagrow</a>, <a href="/search/cs?searchtype=author&amp;query=Danforth%2C+C+M">Christopher M. Danforth</a>, <a href="/search/cs?searchtype=author&amp;query=Dodds%2C+P+S">Peter Sheridan Dodds</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1406.5181v2-abstract-short" style="display: inline;"> With Zipf&#39;s law being originally and most famously observed for word frequency, it is surprisingly limited in its applicability to human language, holding over no more than three to four orders of magnitude before hitting a clear break in scaling. Here, building on the simple observation that phrases of one or more words comprise the most coherent units of meaning in language, we show empirically&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1406.5181v2-abstract-full').style.display = 'inline'; document.getElementById('1406.5181v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1406.5181v2-abstract-full" style="display: none;"> With Zipf&#39;s law being originally and most famously observed for word frequency, it is surprisingly limited in its applicability to human language, holding over no more than three to four orders of magnitude before hitting a clear break in scaling. Here, building on the simple observation that phrases of one or more words comprise the most coherent units of meaning in language, we show empirically that Zipf&#39;s law for phrases extends over as many as nine orders of rank magnitude. In doing so, we develop a principled and scalable statistical mechanical method of random text partitioning, which opens up a rich frontier of rigorous text analysis via a rank ordering of mixed length phrases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1406.5181v2-abstract-full').style.display = 'none'; document.getElementById('1406.5181v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2015; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Manuscript: 6 pages, 3 figures; Supplementary Information: 8 pages, 18 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1406.3855">arXiv:1406.3855</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1406.3855">pdf</a>, <a href="https://arxiv.org/format/1406.3855">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Human language reveals a universal positivity bias </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dodds%2C+P+S">Peter Sheridan Dodds</a>, <a href="/search/cs?searchtype=author&amp;query=Clark%2C+E+M">Eric M. Clark</a>, <a href="/search/cs?searchtype=author&amp;query=Desu%2C+S">Suma Desu</a>, <a href="/search/cs?searchtype=author&amp;query=Frank%2C+M+R">Morgan R. Frank</a>, <a href="/search/cs?searchtype=author&amp;query=Reagan%2C+A+J">Andrew J. Reagan</a>, <a href="/search/cs?searchtype=author&amp;query=Williams%2C+J+R">Jake Ryland Williams</a>, <a href="/search/cs?searchtype=author&amp;query=Mitchell%2C+L">Lewis Mitchell</a>, <a href="/search/cs?searchtype=author&amp;query=Harris%2C+K+D">Kameron Decker Harris</a>, <a href="/search/cs?searchtype=author&amp;query=Kloumann%2C+I+M">Isabel M. Kloumann</a>, <a href="/search/cs?searchtype=author&amp;query=Bagrow%2C+J+P">James P. Bagrow</a>, <a href="/search/cs?searchtype=author&amp;query=Megerdoomian%2C+K">Karine Megerdoomian</a>, <a href="/search/cs?searchtype=author&amp;query=McMahon%2C+M+T">Matthew T. McMahon</a>, <a href="/search/cs?searchtype=author&amp;query=Tivnan%2C+B+F">Brian F. Tivnan</a>, <a href="/search/cs?searchtype=author&amp;query=Danforth%2C+C+M">Christopher M. Danforth</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1406.3855v1-abstract-short" style="display: inline;"> Using human evaluation of 100,000 words spread across 24 corpora in 10 languages diverse in origin and culture, we present evidence of a deep imprint of human sociality in language, observing that (1) the words of natural human language possess a universal positivity bias; (2) the estimated emotional content of words is consistent between languages under translation; and (3) this positivity bias i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1406.3855v1-abstract-full').style.display = 'inline'; document.getElementById('1406.3855v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1406.3855v1-abstract-full" style="display: none;"> Using human evaluation of 100,000 words spread across 24 corpora in 10 languages diverse in origin and culture, we present evidence of a deep imprint of human sociality in language, observing that (1) the words of natural human language possess a universal positivity bias; (2) the estimated emotional content of words is consistent between languages under translation; and (3) this positivity bias is strongly independent of frequency of word usage. Alongside these general regularities, we describe inter-language variations in the emotional spectrum of languages which allow us to rank corpora. We also show how our word evaluations can be used to construct physical-like instruments for both real-time and offline measurement of the emotional content of large-scale texts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1406.3855v1-abstract-full').style.display = 'none'; document.getElementById('1406.3855v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 June, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Manuscript: 7 pages, 4 figures; Supplementary Material: 49 pages, 43 figures, 6 tables. Online appendices available at http://www.uvm.edu/storylab/share/papers/dodds2014a/</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10