CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–18 of 18 results for author: <span class="mathjax">Kuehl, B</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Kuehl, B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Kuehl%2C+B&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Kuehl, B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.16148">arXiv:2407.16148</a> <span> [<a href="https://arxiv.org/pdf/2407.16148">pdf</a>, <a href="https://arxiv.org/format/2407.16148">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CHIME: LLM-Assisted Hierarchical Organization of Scientific Studies for Literature Review Support </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hsu%2C+C">Chao-Chun Hsu</a>, <a href="/search/?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/?searchtype=author&query=Sparks%2C+J">Jenna Sparks</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Tan%2C+C">Chenhao Tan</a>, <a href="/search/?searchtype=author&query=Wadden%2C+D">David Wadden</a>, <a href="/search/?searchtype=author&query=Wang%2C+L+L">Lucy Lu Wang</a>, <a href="/search/?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.16148v1-abstract-short" style="display: inline;"> Literature review requires researchers to synthesize a large amount of information and is increasingly challenging as the scientific literature expands. In this work, we investigate the potential of LLMs for producing hierarchical organizations of scientific studies to assist researchers with literature review. We define hierarchical organizations as tree structures where nodes refer to topical ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16148v1-abstract-full').style.display = 'inline'; document.getElementById('2407.16148v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.16148v1-abstract-full" style="display: none;"> Literature review requires researchers to synthesize a large amount of information and is increasingly challenging as the scientific literature expands. In this work, we investigate the potential of LLMs for producing hierarchical organizations of scientific studies to assist researchers with literature review. We define hierarchical organizations as tree structures where nodes refer to topical categories and every node is linked to the studies assigned to that category. Our naive LLM-based pipeline for hierarchy generation from a set of studies produces promising yet imperfect hierarchies, motivating us to collect CHIME, an expert-curated dataset for this task focused on biomedicine. Given the challenging and time-consuming nature of building hierarchies from scratch, we use a human-in-the-loop process in which experts correct errors (both links between categories and study assignment) in LLM-generated hierarchies. CHIME contains 2,174 LLM-generated hierarchies covering 472 topics, and expert-corrected hierarchies for a subset of 100 topics. Expert corrections allow us to quantify LLM performance, and we find that while they are quite good at generating and organizing categories, their assignment of studies to categories could be improved. We attempt to train a corrector model with human feedback which improves study assignment by 12.6 F1 points. We release our dataset and models to encourage research on developing better assistive tools for literature review. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16148v1-abstract-full').style.display = 'none'; document.getElementById('2407.16148v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2024 ACL Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08446">arXiv:2406.08446</a> <span> [<a href="https://arxiv.org/pdf/2406.08446">pdf</a>, <a href="https://arxiv.org/format/2406.08446">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> OLMES: A Standard for Language Model Evaluations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+Y">Yuling Gu</a>, <a href="/search/?searchtype=author&query=Tafjord%2C+O">Oyvind Tafjord</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Haddad%2C+D">Dany Haddad</a>, <a href="/search/?searchtype=author&query=Dodge%2C+J">Jesse Dodge</a>, <a href="/search/?searchtype=author&query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08446v2-abstract-short" style="display: inline;"> Progress in AI is often demonstrated by new models claiming improved performance on tasks measuring model capabilities. Evaluating language models can be particularly challenging, as choices of how a model is evaluated on a task can lead to large changes in measured performance. There is no common standard setup, so different models are evaluated on the same tasks in different ways, leading to cla… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08446v2-abstract-full').style.display = 'inline'; document.getElementById('2406.08446v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08446v2-abstract-full" style="display: none;"> Progress in AI is often demonstrated by new models claiming improved performance on tasks measuring model capabilities. Evaluating language models can be particularly challenging, as choices of how a model is evaluated on a task can lead to large changes in measured performance. There is no common standard setup, so different models are evaluated on the same tasks in different ways, leading to claims about which models perform best not being reproducible. We propose OLMES, a completely documented, practical, open standard for reproducible LLM evaluations. In developing this standard, we identify and review the varying factors in evaluation practices adopted by the community - such as details of prompt formatting, choice of in-context examples, probability normalizations, and task formulation. In particular, OLMES supports meaningful comparisons between smaller base models that require the unnatural "cloze" formulation of multiple-choice questions against larger models that can utilize the original formulation. OLMES includes well-considered, documented recommendations guided by results from existing literature as well as new experiments resolving open questions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08446v2-abstract-full').style.display = 'none'; document.getElementById('2406.08446v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings of NAACL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03866">arXiv:2403.03866</a> <span> [<a href="https://arxiv.org/pdf/2403.03866">pdf</a>, <a href="https://arxiv.org/format/2403.03866">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> KIWI: A Dataset of Knowledge-Intensive Writing Instructions for Answering Research Questions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xu%2C+F">Fangyuan Xu</a>, <a href="/search/?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/?searchtype=author&query=Soldaini%2C+L">Luca Soldaini</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Choi%2C+E">Eunsol Choi</a>, <a href="/search/?searchtype=author&query=Wadden%2C+D">David Wadden</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03866v1-abstract-short" style="display: inline;"> Large language models (LLMs) adapted to follow user instructions are now widely deployed as conversational agents. In this work, we examine one increasingly common instruction-following task: providing writing assistance to compose a long-form answer. To evaluate the capabilities of current LLMs on this task, we construct KIWI, a dataset of knowledge-intensive writing instructions in the scientifi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03866v1-abstract-full').style.display = 'inline'; document.getElementById('2403.03866v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03866v1-abstract-full" style="display: none;"> Large language models (LLMs) adapted to follow user instructions are now widely deployed as conversational agents. In this work, we examine one increasingly common instruction-following task: providing writing assistance to compose a long-form answer. To evaluate the capabilities of current LLMs on this task, we construct KIWI, a dataset of knowledge-intensive writing instructions in the scientific domain. Given a research question, an initial model-generated answer and a set of relevant papers, an expert annotator iteratively issues instructions for the model to revise and improve its answer. We collect 1,260 interaction turns from 234 interaction sessions with three state-of-the-art LLMs. Each turn includes a user instruction, a model response, and a human evaluation of the model response. Through a detailed analysis of the collected responses, we find that all models struggle to incorporate new information into an existing answer, and to perform precise and unambiguous edits. Further, we find that models struggle to judge whether their outputs successfully followed user instructions, with accuracy at least 10 points short of human agreement. Our findings indicate that KIWI will be a valuable resource to measure progress and improve LLMs' instruction-following capabilities for knowledge intensive writing tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03866v1-abstract-full').style.display = 'none'; document.getElementById('2403.03866v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.09736">arXiv:2311.09736</a> <span> [<a href="https://arxiv.org/pdf/2311.09736">pdf</a>, <a href="https://arxiv.org/format/2311.09736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CARE: Extracting Experimental Findings From Clinical Literature </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/?searchtype=author&query=Downey%2C+D">Doug Downey</a>, <a href="/search/?searchtype=author&query=Hope%2C+T">Tom Hope</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.09736v2-abstract-short" style="display: inline;"> Extracting fine-grained experimental findings from literature can provide dramatic utility for scientific applications. Prior work has developed annotation schemas and datasets for limited aspects of this problem, failing to capture the real-world complexity and nuance required. Focusing on biomedicine, this work presents CARE -- a new IE dataset for the task of extracting clinical findings. We de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09736v2-abstract-full').style.display = 'inline'; document.getElementById('2311.09736v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.09736v2-abstract-full" style="display: none;"> Extracting fine-grained experimental findings from literature can provide dramatic utility for scientific applications. Prior work has developed annotation schemas and datasets for limited aspects of this problem, failing to capture the real-world complexity and nuance required. Focusing on biomedicine, this work presents CARE -- a new IE dataset for the task of extracting clinical findings. We develop a new annotation schema capturing fine-grained findings as n-ary relations between entities and attributes, which unifies phenomena challenging for current IE systems such as discontinuous entity spans, nested relations, variable arity n-ary relations and numeric results in a single schema. We collect extensive annotations for 700 abstracts from two sources: clinical trials and case reports. We also demonstrate the generalizability of our schema to the computer science and materials science domains. We benchmark state-of-the-art IE systems on CARE, showing that even models such as GPT4 struggle. We release our resources to advance research on extracting and aggregating literature findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09736v2-abstract-full').style.display = 'none'; document.getElementById('2311.09736v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at NAACL Findings 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.12587">arXiv:2306.12587</a> <span> [<a href="https://arxiv.org/pdf/2306.12587">pdf</a>, <a href="https://arxiv.org/format/2306.12587">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ARIES: A Corpus of Scientific Paper Edits Made in Response to Peer Reviews </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=D%27Arcy%2C+M">Mike D'Arcy</a>, <a href="/search/?searchtype=author&query=Ross%2C+A">Alexis Ross</a>, <a href="/search/?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Bragg%2C+J">Jonathan Bragg</a>, <a href="/search/?searchtype=author&query=Hope%2C+T">Tom Hope</a>, <a href="/search/?searchtype=author&query=Downey%2C+D">Doug Downey</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.12587v2-abstract-short" style="display: inline;"> We introduce the task of automatically revising scientific papers based on peer feedback and release ARIES, a dataset of review comments and their corresponding paper edits. The data is drawn from real reviewer-author interactions from computer science, and we provide labels linking each reviewer comment to the specific paper edits made by the author in response. We automatically create a high-pre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.12587v2-abstract-full').style.display = 'inline'; document.getElementById('2306.12587v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.12587v2-abstract-full" style="display: none;"> We introduce the task of automatically revising scientific papers based on peer feedback and release ARIES, a dataset of review comments and their corresponding paper edits. The data is drawn from real reviewer-author interactions from computer science, and we provide labels linking each reviewer comment to the specific paper edits made by the author in response. We automatically create a high-precision silver training set, as well as an expert-labeled test set that shows high inter-annotator agreement. In experiments with 10 models covering the state of the art, we find that they struggle even to identify which edits correspond to a comment -- especially when the relationship between the edit and the comment is indirect and requires reasoning to uncover. We also extensively analyze GPT-4's ability to generate edits given a comment and the original paper. We find that it often succeeds on a superficial level, but tends to rigidly follow the wording of the feedback rather than the underlying intent, and lacks technical details compared to human-written edits. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.12587v2-abstract-full').style.display = 'none'; document.getElementById('2306.12587v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2024, 10 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.13693">arXiv:2305.13693</a> <span> [<a href="https://arxiv.org/pdf/2305.13693">pdf</a>, <a href="https://arxiv.org/format/2305.13693">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Automated Metrics for Medical Multi-Document Summarization Disagree with Human Evaluations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+L+L">Lucy Lu Wang</a>, <a href="/search/?searchtype=author&query=Otmakhova%2C+Y">Yulia Otmakhova</a>, <a href="/search/?searchtype=author&query=DeYoung%2C+J">Jay DeYoung</a>, <a href="/search/?searchtype=author&query=Truong%2C+T+H">Thinh Hung Truong</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B+E">Bailey E. Kuehl</a>, <a href="/search/?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/?searchtype=author&query=Wallace%2C+B+C">Byron C. Wallace</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.13693v1-abstract-short" style="display: inline;"> Evaluating multi-document summarization (MDS) quality is difficult. This is especially true in the case of MDS for biomedical literature reviews, where models must synthesize contradicting evidence reported across different documents. Prior work has shown that rather than performing the task, models may exploit shortcuts that are difficult to detect using standard n-gram similarity metrics such as… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13693v1-abstract-full').style.display = 'inline'; document.getElementById('2305.13693v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.13693v1-abstract-full" style="display: none;"> Evaluating multi-document summarization (MDS) quality is difficult. This is especially true in the case of MDS for biomedical literature reviews, where models must synthesize contradicting evidence reported across different documents. Prior work has shown that rather than performing the task, models may exploit shortcuts that are difficult to detect using standard n-gram similarity metrics such as ROUGE. Better automated evaluation metrics are needed, but few resources exist to assess metrics when they are proposed. Therefore, we introduce a dataset of human-assessed summary quality facets and pairwise preferences to encourage and support the development of better automated evaluation methods for literature review MDS. We take advantage of community submissions to the Multi-document Summarization for Literature Review (MSLR) shared task to compile a diverse and representative sample of generated summaries. We analyze how automated summarization evaluation metrics correlate with lexical features of generated summaries, to other automated metrics including several we propose in this work, and to aspects of human-assessed summary quality. We find that not only do automated metrics fail to capture aspects of quality as assessed by humans, in many cases the system rankings produced by these metrics are anti-correlated with rankings according to human annotators. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.13693v1-abstract-full').style.display = 'none'; document.getElementById('2305.13693v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACL 2023; Github: https://github.com/allenai/mslr-annotated-dataset</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.00366">arXiv:2305.00366</a> <span> [<a href="https://arxiv.org/pdf/2305.00366">pdf</a>, <a href="https://arxiv.org/format/2305.00366">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> S2abEL: A Dataset for Entity Linking from Scientific Tables </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lou%2C+Y">Yuze Lou</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/?searchtype=author&query=Feldman%2C+S">Sergey Feldman</a>, <a href="/search/?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/?searchtype=author&query=Downey%2C+D">Doug Downey</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.00366v1-abstract-short" style="display: inline;"> Entity linking (EL) is the task of linking a textual mention to its corresponding entry in a knowledge base, and is critical for many knowledge-intensive NLP applications. When applied to tables in scientific papers, EL is a step toward large-scale scientific knowledge bases that could enable advanced scientific question answering and analytics. We present the first dataset for EL in scientific ta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.00366v1-abstract-full').style.display = 'inline'; document.getElementById('2305.00366v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.00366v1-abstract-full" style="display: none;"> Entity linking (EL) is the task of linking a textual mention to its corresponding entry in a knowledge base, and is critical for many knowledge-intensive NLP applications. When applied to tables in scientific papers, EL is a step toward large-scale scientific knowledge bases that could enable advanced scientific question answering and analytics. We present the first dataset for EL in scientific tables. EL for scientific tables is especially challenging because scientific knowledge bases can be very incomplete, and disambiguating table mentions typically requires understanding the papers's tet in addition to the table. Our dataset, S2abEL, focuses on EL in machine learning results tables and includes hand-labeled cell types, attributed sources, and entity links from the PaperswithCode taxonomy for 8,429 cells from 732 tables. We introduce a neural baseline method designed for EL on scientific tables containing many out-of-knowledge-base mentions, and show that it significantly outperforms a state-of-the-art generic table EL method. The best baselines fall below human performance, and our analysis highlights avenues for improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.00366v1-abstract-full').style.display = 'none'; document.getElementById('2305.00366v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.14334">arXiv:2303.14334</a> <span> [<a href="https://arxiv.org/pdf/2303.14334">pdf</a>, <a href="https://arxiv.org/format/2303.14334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> The Semantic Reader Project: Augmenting Scholarly Documents through AI-Powered Interactive Reading Interfaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/?searchtype=author&query=Chang%2C+J+C">Joseph Chee Chang</a>, <a href="/search/?searchtype=author&query=Head%2C+A">Andrew Head</a>, <a href="/search/?searchtype=author&query=Bragg%2C+J">Jonathan Bragg</a>, <a href="/search/?searchtype=author&query=Zhang%2C+A+X">Amy X. Zhang</a>, <a href="/search/?searchtype=author&query=Trier%2C+C">Cassidy Trier</a>, <a href="/search/?searchtype=author&query=Anastasiades%2C+C">Chloe Anastasiades</a>, <a href="/search/?searchtype=author&query=August%2C+T">Tal August</a>, <a href="/search/?searchtype=author&query=Authur%2C+R">Russell Authur</a>, <a href="/search/?searchtype=author&query=Bragg%2C+D">Danielle Bragg</a>, <a href="/search/?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/?searchtype=author&query=Cachola%2C+I">Isabel Cachola</a>, <a href="/search/?searchtype=author&query=Candra%2C+S">Stefan Candra</a>, <a href="/search/?searchtype=author&query=Chandrasekhar%2C+Y">Yoganand Chandrasekhar</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yen-Sung Chen</a>, <a href="/search/?searchtype=author&query=Cheng%2C+E+Y">Evie Yu-Yen Cheng</a>, <a href="/search/?searchtype=author&query=Chou%2C+Y">Yvonne Chou</a>, <a href="/search/?searchtype=author&query=Downey%2C+D">Doug Downey</a>, <a href="/search/?searchtype=author&query=Evans%2C+R">Rob Evans</a>, <a href="/search/?searchtype=author&query=Fok%2C+R">Raymond Fok</a>, <a href="/search/?searchtype=author&query=Hu%2C+F">Fangzhou Hu</a>, <a href="/search/?searchtype=author&query=Huff%2C+R">Regan Huff</a>, <a href="/search/?searchtype=author&query=Kang%2C+D">Dongyeop Kang</a>, <a href="/search/?searchtype=author&query=Kim%2C+T+S">Tae Soo Kim</a>, <a href="/search/?searchtype=author&query=Kinney%2C+R">Rodney Kinney</a> , et al. (30 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.14334v2-abstract-short" style="display: inline;"> Scholarly publications are key to the transfer of knowledge from scholars to others. However, research papers are information-dense, and as the volume of the scientific literature grows, the need for new technology to support the reading process grows. In contrast to the process of finding papers, which has been transformed by Internet technology, the experience of reading research papers has chan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14334v2-abstract-full').style.display = 'inline'; document.getElementById('2303.14334v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.14334v2-abstract-full" style="display: none;"> Scholarly publications are key to the transfer of knowledge from scholars to others. However, research papers are information-dense, and as the volume of the scientific literature grows, the need for new technology to support the reading process grows. In contrast to the process of finding papers, which has been transformed by Internet technology, the experience of reading research papers has changed little in decades. The PDF format for sharing research papers is widely used due to its portability, but it has significant downsides including: static content, poor accessibility for low-vision readers, and difficulty reading on mobile devices. This paper explores the question "Can recent advances in AI and HCI power intelligent, interactive, and accessible reading interfaces -- even for legacy PDFs?" We describe the Semantic Reader Project, a collaborative effort across multiple institutions to explore automatic creation of dynamic reading interfaces for research papers. Through this project, we've developed ten research prototype interfaces and conducted usability studies with more than 300 participants and real-world users showing improved reading experiences for scholars. We've also released a production reading interface for research papers that will incorporate the best features as they mature. We structure this paper around challenges scholars and the public face when reading research papers -- Discovery, Efficiency, Comprehension, Synthesis, and Accessibility -- and present an overview of our progress and remaining open challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14334v2-abstract-full').style.display = 'none'; document.getElementById('2303.14334v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.13298">arXiv:2301.13298</a> <span> [<a href="https://arxiv.org/pdf/2301.13298">pdf</a>, <a href="https://arxiv.org/format/2301.13298">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LongEval: Guidelines for Human Evaluation of Faithfulness in Long-form Summarization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Krishna%2C+K">Kalpesh Krishna</a>, <a href="/search/?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Iyyer%2C+M">Mohit Iyyer</a>, <a href="/search/?searchtype=author&query=Dasigi%2C+P">Pradeep Dasigi</a>, <a href="/search/?searchtype=author&query=Cohan%2C+A">Arman Cohan</a>, <a href="/search/?searchtype=author&query=Lo%2C+K">Kyle Lo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.13298v1-abstract-short" style="display: inline;"> While human evaluation remains best practice for accurately judging the faithfulness of automatically-generated summaries, few solutions exist to address the increased difficulty and workload when evaluating long-form summaries. Through a survey of 162 papers on long-form summarization, we first shed light on current human evaluation practices surrounding long-form summaries. We find that 73% of t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13298v1-abstract-full').style.display = 'inline'; document.getElementById('2301.13298v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.13298v1-abstract-full" style="display: none;"> While human evaluation remains best practice for accurately judging the faithfulness of automatically-generated summaries, few solutions exist to address the increased difficulty and workload when evaluating long-form summaries. Through a survey of 162 papers on long-form summarization, we first shed light on current human evaluation practices surrounding long-form summaries. We find that 73% of these papers do not perform any human evaluation on model-generated summaries, while other works face new difficulties that manifest when dealing with long documents (e.g., low inter-annotator agreement). Motivated by our survey, we present LongEval, a set of guidelines for human evaluation of faithfulness in long-form summaries that addresses the following challenges: (1) How can we achieve high inter-annotator agreement on faithfulness scores? (2) How can we minimize annotator workload while maintaining accurate faithfulness scores? and (3) Do humans benefit from automated alignment between summary and source snippets? We deploy LongEval in annotation studies on two long-form summarization datasets in different domains (SQuALITY and PubMed), and we find that switching to a finer granularity of judgment (e.g., clause-level) reduces inter-annotator variance in faithfulness scores (e.g., std-dev from 18.5 to 6.8). We also show that scores from a partial annotation of fine-grained units highly correlates with scores from a full annotation workload (0.89 Kendall's tau using 50% judgments). We release our human judgments, annotation templates, and our software as a Python library for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13298v1-abstract-full').style.display = 'none'; document.getElementById('2301.13298v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EACL 2023 camera ready. Code and data can be found in https://github.com/martiansideofthemoon/longeval-summarization</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.10140">arXiv:2301.10140</a> <span> [<a href="https://arxiv.org/pdf/2301.10140">pdf</a>, <a href="https://arxiv.org/format/2301.10140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> The Semantic Scholar Open Data Platform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Kinney%2C+R">Rodney Kinney</a>, <a href="/search/?searchtype=author&query=Anastasiades%2C+C">Chloe Anastasiades</a>, <a href="/search/?searchtype=author&query=Authur%2C+R">Russell Authur</a>, <a href="/search/?searchtype=author&query=Beltagy%2C+I">Iz Beltagy</a>, <a href="/search/?searchtype=author&query=Bragg%2C+J">Jonathan Bragg</a>, <a href="/search/?searchtype=author&query=Buraczynski%2C+A">Alexandra Buraczynski</a>, <a href="/search/?searchtype=author&query=Cachola%2C+I">Isabel Cachola</a>, <a href="/search/?searchtype=author&query=Candra%2C+S">Stefan Candra</a>, <a href="/search/?searchtype=author&query=Chandrasekhar%2C+Y">Yoganand Chandrasekhar</a>, <a href="/search/?searchtype=author&query=Cohan%2C+A">Arman Cohan</a>, <a href="/search/?searchtype=author&query=Crawford%2C+M">Miles Crawford</a>, <a href="/search/?searchtype=author&query=Downey%2C+D">Doug Downey</a>, <a href="/search/?searchtype=author&query=Dunkelberger%2C+J">Jason Dunkelberger</a>, <a href="/search/?searchtype=author&query=Etzioni%2C+O">Oren Etzioni</a>, <a href="/search/?searchtype=author&query=Evans%2C+R">Rob Evans</a>, <a href="/search/?searchtype=author&query=Feldman%2C+S">Sergey Feldman</a>, <a href="/search/?searchtype=author&query=Gorney%2C+J">Joseph Gorney</a>, <a href="/search/?searchtype=author&query=Graham%2C+D">David Graham</a>, <a href="/search/?searchtype=author&query=Hu%2C+F">Fangzhou Hu</a>, <a href="/search/?searchtype=author&query=Huff%2C+R">Regan Huff</a>, <a href="/search/?searchtype=author&query=King%2C+D">Daniel King</a>, <a href="/search/?searchtype=author&query=Kohlmeier%2C+S">Sebastian Kohlmeier</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Langan%2C+M">Michael Langan</a>, <a href="/search/?searchtype=author&query=Lin%2C+D">Daniel Lin</a> , et al. (23 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.10140v1-abstract-short" style="display: inline;"> The volume of scientific output is creating an urgent need for automated tools to help scientists keep up with developments in their field. Semantic Scholar (S2) is an open data platform and website aimed at accelerating science by helping scholars discover and understand scientific literature. We combine public and proprietary data sources using state-of-the-art techniques for scholarly PDF conte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10140v1-abstract-full').style.display = 'inline'; document.getElementById('2301.10140v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.10140v1-abstract-full" style="display: none;"> The volume of scientific output is creating an urgent need for automated tools to help scientists keep up with developments in their field. Semantic Scholar (S2) is an open data platform and website aimed at accelerating science by helping scholars discover and understand scientific literature. We combine public and proprietary data sources using state-of-the-art techniques for scholarly PDF content extraction and automatic knowledge graph construction to build the Semantic Scholar Academic Graph, the largest open scientific literature graph to-date, with 200M+ papers, 80M+ authors, 550M+ paper-authorship edges, and 2.4B+ citation edges. The graph includes advanced semantic features such as structurally parsed text, natural language summaries, and vector embeddings. In this paper, we describe the components of the S2 data processing pipeline and the associated APIs offered by the platform. We will update this living document to reflect changes as we add new data offerings and improve existing services. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10140v1-abstract-full').style.display = 'none'; document.getElementById('2301.10140v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.13777">arXiv:2210.13777</a> <span> [<a href="https://arxiv.org/pdf/2210.13777">pdf</a>, <a href="https://arxiv.org/format/2210.13777">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SciFact-Open: Towards open-domain scientific claim verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wadden%2C+D">David Wadden</a>, <a href="/search/?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Cohan%2C+A">Arman Cohan</a>, <a href="/search/?searchtype=author&query=Beltagy%2C+I">Iz Beltagy</a>, <a href="/search/?searchtype=author&query=Wang%2C+L+L">Lucy Lu Wang</a>, <a href="/search/?searchtype=author&query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.13777v1-abstract-short" style="display: inline;"> While research on scientific claim verification has led to the development of powerful systems that appear to approach human performance, these approaches have yet to be tested in a realistic setting against large corpora of scientific literature. Moving to this open-domain evaluation setting, however, poses unique challenges; in particular, it is infeasible to exhaustively annotate all evidence d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.13777v1-abstract-full').style.display = 'inline'; document.getElementById('2210.13777v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.13777v1-abstract-full" style="display: none;"> While research on scientific claim verification has led to the development of powerful systems that appear to approach human performance, these approaches have yet to be tested in a realistic setting against large corpora of scientific literature. Moving to this open-domain evaluation setting, however, poses unique challenges; in particular, it is infeasible to exhaustively annotate all evidence documents. In this work, we present SciFact-Open, a new test collection designed to evaluate the performance of scientific claim verification systems on a corpus of 500K research abstracts. Drawing upon pooling techniques from information retrieval, we collect evidence for scientific claims by pooling and annotating the top predictions of four state-of-the-art scientific claim verification models. We find that systems developed on smaller corpora struggle to generalize to SciFact-Open, exhibiting performance drops of at least 15 F1. In addition, analysis of the evidence in SciFact-Open reveals interesting phenomena likely to appear when claim verification systems are deployed in practice, e.g., cases where the evidence supports only a special case of the claim. Our dataset is available at https://github.com/dwadden/scifact-open. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.13777v1-abstract-full').style.display = 'none'; document.getElementById('2210.13777v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP Findings 2022. GitHub: https://github.com/dwadden/scifact-open-2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.06982">arXiv:2205.06982</a> <span> [<a href="https://arxiv.org/pdf/2205.06982">pdf</a>, <a href="https://arxiv.org/format/2205.06982">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> ACCoRD: A Multi-Document Approach to Generating Diverse Descriptions of Scientific Concepts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Murthy%2C+S+K">Sonia K. Murthy</a>, <a href="/search/?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/?searchtype=author&query=King%2C+D">Daniel King</a>, <a href="/search/?searchtype=author&query=Bhagavatula%2C+C">Chandra Bhagavatula</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Johnson%2C+S">Sophie Johnson</a>, <a href="/search/?searchtype=author&query=Borchardt%2C+J">Jonathan Borchardt</a>, <a href="/search/?searchtype=author&query=Weld%2C+D+S">Daniel S. Weld</a>, <a href="/search/?searchtype=author&query=Hope%2C+T">Tom Hope</a>, <a href="/search/?searchtype=author&query=Downey%2C+D">Doug Downey</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.06982v1-abstract-short" style="display: inline;"> Systems that can automatically define unfamiliar terms hold the promise of improving the accessibility of scientific texts, especially for readers who may lack prerequisite background knowledge. However, current systems assume a single "best" description per concept, which fails to account for the many potentially useful ways a concept can be described. We present ACCoRD, an end-to-end system tack… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.06982v1-abstract-full').style.display = 'inline'; document.getElementById('2205.06982v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.06982v1-abstract-full" style="display: none;"> Systems that can automatically define unfamiliar terms hold the promise of improving the accessibility of scientific texts, especially for readers who may lack prerequisite background knowledge. However, current systems assume a single "best" description per concept, which fails to account for the many potentially useful ways a concept can be described. We present ACCoRD, an end-to-end system tackling the novel task of generating sets of descriptions of scientific concepts. Our system takes advantage of the myriad ways a concept is mentioned across the scientific literature to produce distinct, diverse descriptions of target scientific concepts in terms of different reference concepts. To support research on the task, we release an expert-annotated resource, the ACCoRD corpus, which includes 1,275 labeled contexts and 1,787 hand-authored concept descriptions. We conduct a user study demonstrating that (1) users prefer descriptions produced by our end-to-end system, and (2) users prefer multiple descriptions to a single "best" description. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.06982v1-abstract-full').style.display = 'none'; document.getElementById('2205.06982v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.12990">arXiv:2203.12990</a> <span> [<a href="https://arxiv.org/pdf/2203.12990">pdf</a>, <a href="https://arxiv.org/format/2203.12990">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Generating Scientific Claims for Zero-Shot Scientific Fact Checking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wright%2C+D">Dustin Wright</a>, <a href="/search/?searchtype=author&query=Wadden%2C+D">David Wadden</a>, <a href="/search/?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Cohan%2C+A">Arman Cohan</a>, <a href="/search/?searchtype=author&query=Augenstein%2C+I">Isabelle Augenstein</a>, <a href="/search/?searchtype=author&query=Wang%2C+L+L">Lucy Lu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.12990v1-abstract-short" style="display: inline;"> Automated scientific fact checking is difficult due to the complexity of scientific language and a lack of significant amounts of training data, as annotation requires domain expertise. To address this challenge, we propose scientific claim generation, the task of generating one or more atomic and verifiable claims from scientific sentences, and demonstrate its usefulness in zero-shot fact checkin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.12990v1-abstract-full').style.display = 'inline'; document.getElementById('2203.12990v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.12990v1-abstract-full" style="display: none;"> Automated scientific fact checking is difficult due to the complexity of scientific language and a lack of significant amounts of training data, as annotation requires domain expertise. To address this challenge, we propose scientific claim generation, the task of generating one or more atomic and verifiable claims from scientific sentences, and demonstrate its usefulness in zero-shot fact checking for biomedical claims. We propose CLAIMGEN-BART, a new supervised method for generating claims supported by the literature, as well as KBIN, a novel method for generating claim negations. Additionally, we adapt an existing unsupervised entity-centric method of claim generation to biomedical claims, which we call CLAIMGEN-ENTITY. Experiments on zero-shot fact checking demonstrate that both CLAIMGEN-ENTITY and CLAIMGEN-BART, coupled with KBIN, achieve up to 90% performance of fully supervised models trained on manually annotated claims and evidence. A rigorous evaluation study demonstrates significant improvement in generated claim and negation quality over existing baselines <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.12990v1-abstract-full').style.display = 'none'; document.getElementById('2203.12990v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2022; 13 pages, 3 figures, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.13751">arXiv:2108.13751</a> <span> [<a href="https://arxiv.org/pdf/2108.13751">pdf</a>, <a href="https://arxiv.org/format/2108.13751">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> A Search Engine for Discovery of Scientific Challenges and Directions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lahav%2C+D">Dan Lahav</a>, <a href="/search/?searchtype=author&query=Falcon%2C+J+S">Jon Saad Falcon</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Johnson%2C+S">Sophie Johnson</a>, <a href="/search/?searchtype=author&query=Parasa%2C+S">Sravanthi Parasa</a>, <a href="/search/?searchtype=author&query=Shomron%2C+N">Noam Shomron</a>, <a href="/search/?searchtype=author&query=Chau%2C+D+H">Duen Horng Chau</a>, <a href="/search/?searchtype=author&query=Yang%2C+D">Diyi Yang</a>, <a href="/search/?searchtype=author&query=Horvitz%2C+E">Eric Horvitz</a>, <a href="/search/?searchtype=author&query=Weld%2C+D+S">Daniel S. Weld</a>, <a href="/search/?searchtype=author&query=Hope%2C+T">Tom Hope</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.13751v3-abstract-short" style="display: inline;"> Keeping track of scientific challenges, advances and emerging directions is a fundamental part of research. However, researchers face a flood of papers that hinders discovery of important knowledge. In biomedicine, this directly impacts human lives. To address this problem, we present a novel task of extraction and search of scientific challenges and directions, to facilitate rapid knowledge disco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.13751v3-abstract-full').style.display = 'inline'; document.getElementById('2108.13751v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.13751v3-abstract-full" style="display: none;"> Keeping track of scientific challenges, advances and emerging directions is a fundamental part of research. However, researchers face a flood of papers that hinders discovery of important knowledge. In biomedicine, this directly impacts human lives. To address this problem, we present a novel task of extraction and search of scientific challenges and directions, to facilitate rapid knowledge discovery. We construct and release an expert-annotated corpus of texts sampled from full-length papers, labeled with novel semantic categories that generalize across many types of challenges and directions. We focus on a large corpus of interdisciplinary work relating to the COVID-19 pandemic, ranging from biomedicine to areas such as AI and economics. We apply a model trained on our data to identify challenges and directions across the corpus and build a dedicated search engine. In experiments with 19 researchers and clinicians using our system, we outperform a popular scientific search engine in assisting knowledge discovery. Finally, we show that models trained on our resource generalize to the wider biomedical domain and to AI papers, highlighting its broad utility. We make our data, model and search engine publicly available. https://challenges.apps.allenai.org/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.13751v3-abstract-full').style.display = 'none'; document.getElementById('2108.13751v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 2022</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> AAAI 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.00414">arXiv:2107.00414</a> <span> [<a href="https://arxiv.org/pdf/2107.00414">pdf</a>, <a href="https://arxiv.org/format/2107.00414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MultiCite: Modeling realistic citations requires moving beyond the single-sentence single-label setting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lauscher%2C+A">Anne Lauscher</a>, <a href="/search/?searchtype=author&query=Ko%2C+B">Brandon Ko</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Johnson%2C+S">Sophie Johnson</a>, <a href="/search/?searchtype=author&query=Jurgens%2C+D">David Jurgens</a>, <a href="/search/?searchtype=author&query=Cohan%2C+A">Arman Cohan</a>, <a href="/search/?searchtype=author&query=Lo%2C+K">Kyle Lo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.00414v2-abstract-short" style="display: inline;"> Citation context analysis (CCA) is an important task in natural language processing that studies how and why scholars discuss each others' work. Despite decades of study, traditional frameworks for CCA have largely relied on overly-simplistic assumptions of how authors cite, which ignore several important phenomena. For instance, scholarly papers often contain rich discussions of cited work that s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.00414v2-abstract-full').style.display = 'inline'; document.getElementById('2107.00414v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.00414v2-abstract-full" style="display: none;"> Citation context analysis (CCA) is an important task in natural language processing that studies how and why scholars discuss each others' work. Despite decades of study, traditional frameworks for CCA have largely relied on overly-simplistic assumptions of how authors cite, which ignore several important phenomena. For instance, scholarly papers often contain rich discussions of cited work that span multiple sentences and express multiple intents concurrently. Yet, CCA is typically approached as a single-sentence, single-label classification task, and thus existing datasets fail to capture this interesting discourse. In our work, we address this research gap by proposing a novel framework for CCA as a document-level context extraction and labeling task. We release MultiCite, a new dataset of 12,653 citation contexts from over 1,200 computational linguistics papers. Not only is it the largest collection of expert-annotated citation contexts to-date, MultiCite contains multi-sentence, multi-label citation contexts within full paper texts. Finally, we demonstrate how our dataset, while still usable for training classic CCA models, also supports the development of new types of models for CCA beyond fixed-width text classification. We release our code and dataset at https://github.com/allenai/multicite. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.00414v2-abstract-full').style.display = 'none'; document.getElementById('2107.00414v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.00676">arXiv:2106.00676</a> <span> [<a href="https://arxiv.org/pdf/2106.00676">pdf</a>, <a href="https://arxiv.org/format/2106.00676">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VILA: Improving Structured Content Extraction from Scientific PDFs Using Visual Layout Groups </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Shen%2C+Z">Zejiang Shen</a>, <a href="/search/?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/?searchtype=author&query=Wang%2C+L+L">Lucy Lu Wang</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Weld%2C+D+S">Daniel S. Weld</a>, <a href="/search/?searchtype=author&query=Downey%2C+D">Doug Downey</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.00676v3-abstract-short" style="display: inline;"> Accurately extracting structured content from PDFs is a critical first step for NLP over scientific papers. Recent work has improved extraction accuracy by incorporating elementary layout information, e.g., each token's 2D position on the page, into language model pretraining. We introduce new methods that explicitly model VIsual LAyout (VILA) groups, i.e., text lines or text blocks, to further im… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.00676v3-abstract-full').style.display = 'inline'; document.getElementById('2106.00676v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.00676v3-abstract-full" style="display: none;"> Accurately extracting structured content from PDFs is a critical first step for NLP over scientific papers. Recent work has improved extraction accuracy by incorporating elementary layout information, e.g., each token's 2D position on the page, into language model pretraining. We introduce new methods that explicitly model VIsual LAyout (VILA) groups, i.e., text lines or text blocks, to further improve performance. In our I-VILA approach, we show that simply inserting special tokens denoting layout group boundaries into model inputs can lead to a 1.9% Macro F1 improvement in token classification. In the H-VILA approach, we show that hierarchical encoding of layout-groups can result in up-to 47% inference time reduction with less than 0.8% Macro F1 loss. Unlike prior layout-aware approaches, our methods do not require expensive additional pretraining, only fine-tuning, which we show can reduce training cost by up to 95%. Experiments are conducted on a newly curated evaluation suite, S2-VLUE, that unifies existing automatically-labeled datasets and includes a new dataset of manual annotations covering diverse papers from 19 scientific disciplines. Pre-trained weights, benchmark datasets, and source code are available at https://github.com/allenai/VILA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.00676v3-abstract-full').style.display = 'none'; document.getElementById('2106.00676v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in TACL 2022. The arXiv version is a pre-MIT Press publication version. (17 pages, 5 figures, 9 tables)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.00076">arXiv:2105.00076</a> <span> [<a href="https://arxiv.org/pdf/2105.00076">pdf</a>, <a href="https://arxiv.org/format/2105.00076">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Improving the Accessibility of Scientific Documents: Current State, User Needs, and a System Solution to Enhance Scientific PDF Accessibility for Blind and Low Vision Users </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+L+L">Lucy Lu Wang</a>, <a href="/search/?searchtype=author&query=Cachola%2C+I">Isabel Cachola</a>, <a href="/search/?searchtype=author&query=Bragg%2C+J">Jonathan Bragg</a>, <a href="/search/?searchtype=author&query=Cheng%2C+E+Y">Evie Yu-Yen Cheng</a>, <a href="/search/?searchtype=author&query=Haupt%2C+C">Chelsea Haupt</a>, <a href="/search/?searchtype=author&query=Latzke%2C+M">Matt Latzke</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=van+Zuylen%2C+M">Madeleine van Zuylen</a>, <a href="/search/?searchtype=author&query=Wagner%2C+L">Linda Wagner</a>, <a href="/search/?searchtype=author&query=Weld%2C+D+S">Daniel S. Weld</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.00076v1-abstract-short" style="display: inline;"> The majority of scientific papers are distributed in PDF, which pose challenges for accessibility, especially for blind and low vision (BLV) readers. We characterize the scope of this problem by assessing the accessibility of 11,397 PDFs published 2010--2019 sampled across various fields of study, finding that only 2.4% of these PDFs satisfy all of our defined accessibility criteria. We introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.00076v1-abstract-full').style.display = 'inline'; document.getElementById('2105.00076v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.00076v1-abstract-full" style="display: none;"> The majority of scientific papers are distributed in PDF, which pose challenges for accessibility, especially for blind and low vision (BLV) readers. We characterize the scope of this problem by assessing the accessibility of 11,397 PDFs published 2010--2019 sampled across various fields of study, finding that only 2.4% of these PDFs satisfy all of our defined accessibility criteria. We introduce the SciA11y system to offset some of the issues around inaccessibility. SciA11y incorporates several machine learning models to extract the content of scientific PDFs and render this content as accessible HTML, with added novel navigational features to support screen reader users. An intrinsic evaluation of extraction quality indicates that the majority of HTML renders (87%) produced by our system have no or only some readability issues. We perform a qualitative user study to understand the needs of BLV researchers when reading papers, and to assess whether the SciA11y system could address these needs. We summarize our user study findings into a set of five design recommendations for accessible scientific reader systems. User response to SciA11y was positive, with all users saying they would be likely to use the system in the future, and some stating that the system, if available, would become their primary workflow. We successfully produce HTML renders for over 12M papers, of which an open access subset of 1.5M are available for browsing at https://scia11y.org/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.00076v1-abstract-full').style.display = 'none'; document.getElementById('2105.00076v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">44 pages, 11 figures, 10 tables, 4 appendices; accessible PDF is available at https://llwang.net/publications/2021_wang_scia11y.pdf</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2104.06486">arXiv:2104.06486</a> <span> [<a href="https://arxiv.org/pdf/2104.06486">pdf</a>, <a href="https://arxiv.org/format/2104.06486">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MS2: Multi-Document Summarization of Medical Studies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=DeYoung%2C+J">Jay DeYoung</a>, <a href="/search/?searchtype=author&query=Beltagy%2C+I">Iz Beltagy</a>, <a href="/search/?searchtype=author&query=van+Zuylen%2C+M">Madeleine van Zuylen</a>, <a href="/search/?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/?searchtype=author&query=Wang%2C+L+L">Lucy Lu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2104.06486v3-abstract-short" style="display: inline;"> To assess the effectiveness of any medical intervention, researchers must conduct a time-intensive and highly manual literature review. NLP systems can help to automate or assist in parts of this expensive process. In support of this goal, we release MS^2 (Multi-Document Summarization of Medical Studies), a dataset of over 470k documents and 20k summaries derived from the scientific literature. Th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.06486v3-abstract-full').style.display = 'inline'; document.getElementById('2104.06486v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2104.06486v3-abstract-full" style="display: none;"> To assess the effectiveness of any medical intervention, researchers must conduct a time-intensive and highly manual literature review. NLP systems can help to automate or assist in parts of this expensive process. In support of this goal, we release MS^2 (Multi-Document Summarization of Medical Studies), a dataset of over 470k documents and 20k summaries derived from the scientific literature. This dataset facilitates the development of systems that can assess and aggregate contradictory evidence across multiple studies, and is the first large-scale, publicly available multi-document summarization dataset in the biomedical domain. We experiment with a summarization system based on BART, with promising early results. We formulate our summarization inputs and targets in both free text and structured forms and modify a recently proposed metric to assess the quality of our system's generated summaries. Data and models are available at https://github.com/allenai/ms2 <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2104.06486v3-abstract-full').style.display = 'none'; document.getElementById('2104.06486v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages of content, 20 pages including references and appendix. See https://github.com/allenai/ms2/ for code, https://ai2-s2-ms2.s3-us-west-2.amazonaws.com/ms_data_2021-04-12.zip for data (1.8G, zipped) Published in EMNLP 2021 @ https://aclanthology.org/2021.emnlp-main.594/</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>