Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 76 results for author: <span class="mathjax">Naik, A</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Naik%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Naik, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Naik%2C+A&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Naik, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Naik%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Naik%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Naik%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13966">arXiv:2502.13966</a> <span> [<a href="https://arxiv.org/pdf/2502.13966">pdf</a>, <a href="https://arxiv.org/format/2502.13966">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Where's the Bug? Attention Probing for Scalable Fault Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Stein%2C+A">Adam Stein</a>, <a href="/search/cs?searchtype=author&query=Wayne%2C+A">Arthur Wayne</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aaditya Naik</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+M">Mayur Naik</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+E">Eric Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13966v2-abstract-short" style="display: inline;"> Ensuring code correctness remains a challenging problem even as large language models (LLMs) become increasingly capable at code-related tasks. While LLM-based program repair systems can propose bug fixes using only a user's bug report, their effectiveness is fundamentally limited by their ability to perform fault localization (FL), a challenging problem for both humans and LLMs. Existing FL appro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13966v2-abstract-full').style.display = 'inline'; document.getElementById('2502.13966v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13966v2-abstract-full" style="display: none;"> Ensuring code correctness remains a challenging problem even as large language models (LLMs) become increasingly capable at code-related tasks. While LLM-based program repair systems can propose bug fixes using only a user's bug report, their effectiveness is fundamentally limited by their ability to perform fault localization (FL), a challenging problem for both humans and LLMs. Existing FL approaches rely on executable test cases, require training on costly and often noisy line-level annotations, or demand resource-intensive LLMs. In this paper, we present Bug Attention Probe (BAP), a method which learns state-of-the-art fault localization without any direct localization labels, outperforming traditional FL baselines and prompting of large-scale LLMs. We evaluate our approach across a variety of code settings, including real-world Java bugs from the standard Defects4J dataset as well as seven other datasets which span a diverse set of bug types and languages. Averaged across all eight datasets, BAP improves by 34.6% top-1 accuracy compared to the strongest baseline and 93.4% over zero-shot prompting GPT-4o. BAP is also significantly more efficient than prompting, outperforming large open-weight models at a small fraction of the computational cost. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13966v2-abstract-full').style.display = 'none'; document.getElementById('2502.13966v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16524">arXiv:2501.16524</a> <span> [<a href="https://arxiv.org/pdf/2501.16524">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Programming by Examples Meets Historical Linguistics: A Large Language Model Based Approach to Sound Law Induction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+D">Darsh Agrawal</a>, <a href="/search/cs?searchtype=author&query=Sng%2C+H">Hong Sng</a>, <a href="/search/cs?searchtype=author&query=Marr%2C+C">Clayton Marr</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kexun Zhang</a>, <a href="/search/cs?searchtype=author&query=Robinson%2C+N+R">Nathaniel R Robinson</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+K">Kalvin Chang</a>, <a href="/search/cs?searchtype=author&query=Byrnes%2C+R">Rebecca Byrnes</a>, <a href="/search/cs?searchtype=author&query=Mysore%2C+A">Aravind Mysore</a>, <a href="/search/cs?searchtype=author&query=Rose%2C+C">Carolyn Rose</a>, <a href="/search/cs?searchtype=author&query=Mortensen%2C+D+R">David R Mortensen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16524v1-abstract-short" style="display: inline;"> Historical linguists have long written "programs" that convert reconstructed words in an ancestor language into their attested descendants via ordered string rewrite functions (called sound laws) However, writing these programs is time-consuming, motivating the development of automated Sound Law Induction (SLI) which we formulate as Programming by Examples (PBE) with Large Language Models (LLMs) i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16524v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16524v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16524v1-abstract-full" style="display: none;"> Historical linguists have long written "programs" that convert reconstructed words in an ancestor language into their attested descendants via ordered string rewrite functions (called sound laws) However, writing these programs is time-consuming, motivating the development of automated Sound Law Induction (SLI) which we formulate as Programming by Examples (PBE) with Large Language Models (LLMs) in this paper. While LLMs have been effective for code generation, recent work has shown that PBE is challenging but improvable by fine-tuning, especially with training data drawn from the same distribution as evaluation data. In this paper, we create a conceptual framework of what constitutes a "similar distribution" for SLI and propose four kinds of synthetic data generation methods with varying amounts of inductive bias to investigate what leads to the best performance. Based on the results we create a SOTA open-source model for SLI as PBE (+6% pass rate with a third of the parameters of the second-best LLM) and also highlight exciting future directions for PBE research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16524v1-abstract-full').style.display = 'none'; document.getElementById('2501.16524v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11198">arXiv:2501.11198</a> <span> [<a href="https://arxiv.org/pdf/2501.11198">pdf</a>, <a href="https://arxiv.org/format/2501.11198">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Energy-Efficient Satellite IoT Optical Downlinks Using Weather-Adaptive Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fettes%2C+E">Ethan Fettes</a>, <a href="/search/cs?searchtype=author&query=Madoery%2C+P+G">Pablo G. Madoery</a>, <a href="/search/cs?searchtype=author&query=Yanikomeroglu%2C+H">Halim Yanikomeroglu</a>, <a href="/search/cs?searchtype=author&query=Karabulut-Kurt%2C+G">Gunes Karabulut-Kurt</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Abhishek Naik</a>, <a href="/search/cs?searchtype=author&query=Bellinger%2C+C">Colin Bellinger</a>, <a href="/search/cs?searchtype=author&query=Martel%2C+S">Stephane Martel</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+K">Khaled Ahmed</a>, <a href="/search/cs?searchtype=author&query=Siddiqui%2C+S">Sameera Siddiqui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11198v1-abstract-short" style="display: inline;"> Internet of Things (IoT) devices have become increasingly ubiquitous with applications not only in urban areas but remote areas as well. These devices support industries such as agriculture, forestry, and resource extraction. Due to the device location being in remote areas, satellites are frequently used to collect and deliver IoT device data to customers. As these devices become increasingly adv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11198v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11198v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11198v1-abstract-full" style="display: none;"> Internet of Things (IoT) devices have become increasingly ubiquitous with applications not only in urban areas but remote areas as well. These devices support industries such as agriculture, forestry, and resource extraction. Due to the device location being in remote areas, satellites are frequently used to collect and deliver IoT device data to customers. As these devices become increasingly advanced and numerous, the amount of data produced has rapidly increased potentially straining the ability for radio frequency (RF) downlink capacity. Free space optical communications with their wide available bandwidths and high data rates are a potential solution, but these communication systems are highly vulnerable to weather-related disruptions. This results in certain communication opportunities being inefficient in terms of the amount of data received versus the power expended. In this paper, we propose a deep reinforcement learning (DRL) method using Deep Q-Networks that takes advantage of weather condition forecasts to improve energy efficiency while delivering the same number of packets as schemes that don't factor weather into routing decisions. We compare this method with simple approaches that utilize simple cloud cover thresholds to improve energy efficiency. In testing the DRL approach provides improved median energy efficiency without a significant reduction in median delivery ratio. Simple cloud cover thresholds were also found to be effective but the thresholds with the highest energy efficiency had reduced median delivery ratio values. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11198v1-abstract-full').style.display = 'none'; document.getElementById('2501.11198v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.01331">arXiv:2412.01331</a> <span> [<a href="https://arxiv.org/pdf/2412.01331">pdf</a>, <a href="https://arxiv.org/format/2412.01331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Exploring Long-Term Prediction of Type 2 Diabetes Microvascular Complications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Remfry%2C+E">Elizabeth Remfry</a>, <a href="/search/cs?searchtype=author&query=Henkin%2C+R">Rafael Henkin</a>, <a href="/search/cs?searchtype=author&query=Barnes%2C+M+R">Michael R Barnes</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.01331v1-abstract-short" style="display: inline;"> Electronic healthcare records (EHR) contain a huge wealth of data that can support the prediction of clinical outcomes. EHR data is often stored and analysed using clinical codes (ICD10, SNOMED), however these can differ across registries and healthcare providers. Integrating data across systems involves mapping between different clinical ontologies requiring domain expertise, and at times resulti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01331v1-abstract-full').style.display = 'inline'; document.getElementById('2412.01331v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.01331v1-abstract-full" style="display: none;"> Electronic healthcare records (EHR) contain a huge wealth of data that can support the prediction of clinical outcomes. EHR data is often stored and analysed using clinical codes (ICD10, SNOMED), however these can differ across registries and healthcare providers. Integrating data across systems involves mapping between different clinical ontologies requiring domain expertise, and at times resulting in data loss. To overcome this, code-agnostic models have been proposed. We assess the effectiveness of a code-agnostic representation approach on the task of long-term microvascular complication prediction for individuals living with Type 2 Diabetes. Our method encodes individual EHRs as text using fine-tuned, pretrained clinical language models. Leveraging large-scale EHR data from the UK, we employ a multi-label approach to simultaneously predict the risk of microvascular complications across 1-, 5-, and 10-year windows. We demonstrate that a code-agnostic approach outperforms a code-based model and illustrate that performance is better with longer prediction windows but is biased to the first occurring complication. Overall, we highlight that context length is vitally important for model performance. This study highlights the possibility of including data from across different clinical ontologies and is a starting point for generalisable clinical models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.01331v1-abstract-full').style.display = 'none'; document.getElementById('2412.01331v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings paper presented at Machine Learning for Health (ML4H) symposium 2024, December 15-16, 2024, Vancouver, Canada, 9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15221">arXiv:2411.15221</a> <span> [<a href="https://arxiv.org/pdf/2411.15221">pdf</a>, <a href="https://arxiv.org/format/2411.15221">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Reflections from the 2024 Large Language Model (LLM) Hackathon for Applications in Materials Science and Chemistry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zimmermann%2C+Y">Yoel Zimmermann</a>, <a href="/search/cs?searchtype=author&query=Bazgir%2C+A">Adib Bazgir</a>, <a href="/search/cs?searchtype=author&query=Afzal%2C+Z">Zartashia Afzal</a>, <a href="/search/cs?searchtype=author&query=Agbere%2C+F">Fariha Agbere</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+Q">Qianxiang Ai</a>, <a href="/search/cs?searchtype=author&query=Alampara%2C+N">Nawaf Alampara</a>, <a href="/search/cs?searchtype=author&query=Al-Feghali%2C+A">Alexander Al-Feghali</a>, <a href="/search/cs?searchtype=author&query=Ansari%2C+M">Mehrad Ansari</a>, <a href="/search/cs?searchtype=author&query=Antypov%2C+D">Dmytro Antypov</a>, <a href="/search/cs?searchtype=author&query=Aswad%2C+A">Amro Aswad</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+J">Jiaru Bai</a>, <a href="/search/cs?searchtype=author&query=Baibakova%2C+V">Viktoriia Baibakova</a>, <a href="/search/cs?searchtype=author&query=Biswajeet%2C+D+D">Devi Dutta Biswajeet</a>, <a href="/search/cs?searchtype=author&query=Bitzek%2C+E">Erik Bitzek</a>, <a href="/search/cs?searchtype=author&query=Bocarsly%2C+J+D">Joshua D. Bocarsly</a>, <a href="/search/cs?searchtype=author&query=Borisova%2C+A">Anna Borisova</a>, <a href="/search/cs?searchtype=author&query=Bran%2C+A+M">Andres M Bran</a>, <a href="/search/cs?searchtype=author&query=Brinson%2C+L+C">L. Catherine Brinson</a>, <a href="/search/cs?searchtype=author&query=Calderon%2C+M+M">Marcel Moran Calderon</a>, <a href="/search/cs?searchtype=author&query=Canalicchio%2C+A">Alessandro Canalicchio</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+V">Victor Chen</a>, <a href="/search/cs?searchtype=author&query=Chiang%2C+Y">Yuan Chiang</a>, <a href="/search/cs?searchtype=author&query=Circi%2C+D">Defne Circi</a>, <a href="/search/cs?searchtype=author&query=Charmes%2C+B">Benjamin Charmes</a>, <a href="/search/cs?searchtype=author&query=Chaudhary%2C+V">Vikrant Chaudhary</a> , et al. (119 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15221v2-abstract-short" style="display: inline;"> Here, we present the outcomes from the second Large Language Model (LLM) Hackathon for Applications in Materials Science and Chemistry, which engaged participants across global hybrid locations, resulting in 34 team submissions. The submissions spanned seven key application areas and demonstrated the diverse utility of LLMs for applications in (1) molecular and material property prediction; (2) mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15221v2-abstract-full').style.display = 'inline'; document.getElementById('2411.15221v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15221v2-abstract-full" style="display: none;"> Here, we present the outcomes from the second Large Language Model (LLM) Hackathon for Applications in Materials Science and Chemistry, which engaged participants across global hybrid locations, resulting in 34 team submissions. The submissions spanned seven key application areas and demonstrated the diverse utility of LLMs for applications in (1) molecular and material property prediction; (2) molecular and material design; (3) automation and novel interfaces; (4) scientific communication and education; (5) research data management and automation; (6) hypothesis generation and evaluation; and (7) knowledge extraction and reasoning from scientific literature. Each team submission is presented in a summary table with links to the code and as brief papers in the appendix. Beyond team results, we discuss the hackathon event and its hybrid format, which included physical hubs in Toronto, Montreal, San Francisco, Berlin, Lausanne, and Tokyo, alongside a global online hub to enable local and virtual collaboration. Overall, the event highlighted significant improvements in LLM capabilities since the previous year's hackathon, suggesting continued expansion of LLMs for applications in materials science and chemistry research. These outcomes demonstrate the dual utility of LLMs as both multipurpose models for diverse machine learning tasks and platforms for rapid prototyping custom applications in scientific research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15221v2-abstract-full').style.display = 'none'; document.getElementById('2411.15221v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Updating author information, the submission remains largely unchanged. 98 pages total</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22360">arXiv:2410.22360</a> <span> [<a href="https://arxiv.org/pdf/2410.22360">pdf</a>, <a href="https://arxiv.org/format/2410.22360">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ArxivDIGESTables: Synthesizing Scientific Literature into Tables using Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Newman%2C+B">Benjamin Newman</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Yoonjoo Lee</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Siangliulue%2C+P">Pao Siangliulue</a>, <a href="/search/cs?searchtype=author&query=Fok%2C+R">Raymond Fok</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Juho Kim</a>, <a href="/search/cs?searchtype=author&query=Weld%2C+D+S">Daniel S. Weld</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+J+C">Joseph Chee Chang</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+K">Kyle Lo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22360v1-abstract-short" style="display: inline;"> When conducting literature reviews, scientists often create literature review tables - tables whose rows are publications and whose columns constitute a schema, a set of aspects used to compare and contrast the papers. Can we automatically generate these tables using language models (LMs)? In this work, we introduce a framework that leverages LMs to perform this task by decomposing it into separat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22360v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22360v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22360v1-abstract-full" style="display: none;"> When conducting literature reviews, scientists often create literature review tables - tables whose rows are publications and whose columns constitute a schema, a set of aspects used to compare and contrast the papers. Can we automatically generate these tables using language models (LMs)? In this work, we introduce a framework that leverages LMs to perform this task by decomposing it into separate schema and value generation steps. To enable experimentation, we address two main challenges: First, we overcome a lack of high-quality datasets to benchmark table generation by curating and releasing arxivDIGESTables, a new dataset of 2,228 literature review tables extracted from ArXiv papers that synthesize a total of 7,542 research papers. Second, to support scalable evaluation of model generations against human-authored reference tables, we develop DecontextEval, an automatic evaluation method that aligns elements of tables with the same underlying aspects despite differing surface forms. Given these tools, we evaluate LMs' abilities to reconstruct reference tables, finding this task benefits from additional context to ground the generation (e.g. table captions, in-text references). Finally, through a human evaluation study we find that even when LMs fail to fully reconstruct a reference table, their generated novel aspects can still be useful. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22360v1-abstract-full').style.display = 'none'; document.getElementById('2410.22360v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024, 21 pages, 8 figures, 10 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05811">arXiv:2410.05811</a> <span> [<a href="https://arxiv.org/pdf/2410.05811">pdf</a>, <a href="https://arxiv.org/ps/2410.05811">ps</a>, <a href="https://arxiv.org/format/2410.05811">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.21105/joss.06906">10.21105/joss.06906 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> lintsampler: Easy random sampling via linear interpolation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A+P">Aneesh P. Naik</a>, <a href="/search/cs?searchtype=author&query=Petersen%2C+M+S">Michael S. Petersen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05811v1-abstract-short" style="display: inline;"> 'lintsampler' provides a Python implementation of a technique we term 'linear interpolant sampling': an algorithm to efficiently draw pseudo-random samples from an arbitrary probability density function (PDF). First, the PDF is evaluated on a grid-like structure. Then, it is assumed that the PDF can be approximated between grid vertices by the (multidimensional) linear interpolant. With this assum… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05811v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05811v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05811v1-abstract-full" style="display: none;"> 'lintsampler' provides a Python implementation of a technique we term 'linear interpolant sampling': an algorithm to efficiently draw pseudo-random samples from an arbitrary probability density function (PDF). First, the PDF is evaluated on a grid-like structure. Then, it is assumed that the PDF can be approximated between grid vertices by the (multidimensional) linear interpolant. With this assumption, random samples can be efficiently drawn via inverse transform sampling. lintsampler is primarily written with 'numpy', drawing some additional functionality from 'scipy'. Under the most basic usage of lintsampler, the user provides a Python function defining the target PDF and some parameters describing a grid-like structure to the 'LintSampler' class, and is then able to draw samples via the 'sample' method. Additionally, there is functionality for the user to set the random seed, employ quasi-Monte Carlo sampling, or sample within a premade grid ('DensityGrid') or tree ('DensityTree') structure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05811v1-abstract-full').style.display = 'none'; document.getElementById('2410.05811v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Journal of Open Source Software. Describes code repository at https://github.com/aneeshnaik/lintsampler</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Journal of Open Source Software, 2024, 9(102), 6906 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03348">arXiv:2410.03348</a> <span> [<a href="https://arxiv.org/pdf/2410.03348">pdf</a>, <a href="https://arxiv.org/format/2410.03348">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dolphin: A Programmable Framework for Scalable Neurosymbolic Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aaditya Naik</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jason Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Claire Wang</a>, <a href="/search/cs?searchtype=author&query=Sethi%2C+A">Amish Sethi</a>, <a href="/search/cs?searchtype=author&query=Dutta%2C+S">Saikat Dutta</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+M">Mayur Naik</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+E">Eric Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03348v3-abstract-short" style="display: inline;"> Neurosymbolic learning enables the integration of symbolic reasoning with deep learning but faces significant challenges in scaling to complex symbolic programs, large datasets, or both. We introduce Dolphin, a framework that tackles these challenges by supporting neurosymbolic programs in Python, executing complex symbolic reasoning on the CPU while vectorizing probabilistic computations and grad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03348v3-abstract-full').style.display = 'inline'; document.getElementById('2410.03348v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03348v3-abstract-full" style="display: none;"> Neurosymbolic learning enables the integration of symbolic reasoning with deep learning but faces significant challenges in scaling to complex symbolic programs, large datasets, or both. We introduce Dolphin, a framework that tackles these challenges by supporting neurosymbolic programs in Python, executing complex symbolic reasoning on the CPU while vectorizing probabilistic computations and gradient propagation on the GPU. Across 13 benchmarks spanning tasks over text, image, and video data, with symbolic reasoning features like recursion and black-box functions, Dolphin converges to state-of-the-art accuracies on the more complex benchmarks while existing frameworks such as Scallop, ISED, and IndeCateR+ fail to converge within the time limit. On simpler benchmarks, Dolphin matches their performance, while achieving these results 1.71x to 62x faster than the baselines. Overall, Dolphin advances the scalability of neurosymbolic frameworks, achieving state-of-the-art efficiency and convergence on difficult benchmarks where existing frameworks struggle. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03348v3-abstract-full').style.display = 'none'; document.getElementById('2410.03348v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19801">arXiv:2409.19801</a> <span> [<a href="https://arxiv.org/pdf/2409.19801">pdf</a>, <a href="https://arxiv.org/format/2409.19801">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CRScore: Grounding Automated Evaluation of Code Review Comments in Code Claims and Smells </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Alenius%2C+M">Marcus Alenius</a>, <a href="/search/cs?searchtype=author&query=Fried%2C+D">Daniel Fried</a>, <a href="/search/cs?searchtype=author&query=Rose%2C+C">Carolyn Rose</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19801v1-abstract-short" style="display: inline;"> The task of automated code review has recently gained a lot of attention from the machine learning community. However, current review comment evaluation metrics rely on comparisons with a human-written reference for a given code change (also called a diff), even though code review is a one-to-many problem like generation and summarization with many "valid reviews" for a diff. To tackle these issue… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19801v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19801v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19801v1-abstract-full" style="display: none;"> The task of automated code review has recently gained a lot of attention from the machine learning community. However, current review comment evaluation metrics rely on comparisons with a human-written reference for a given code change (also called a diff), even though code review is a one-to-many problem like generation and summarization with many "valid reviews" for a diff. To tackle these issues we develop a CRScore - a reference-free metric to measure dimensions of review quality like conciseness, comprehensiveness, and relevance. We design CRScore to evaluate reviews in a way that is grounded in claims and potential issues detected in the code by LLMs and static analyzers. We demonstrate that CRScore can produce valid, fine-grained scores of review quality that have the greatest alignment with human judgment (0.54 Spearman correlation) and are more sensitive than reference-based metrics. We also release a corpus of 2.6k human-annotated review quality scores for machine-generated and GitHub review comments to support the development of automated metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19801v1-abstract-full').style.display = 'none'; document.getElementById('2409.19801v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.16148">arXiv:2407.16148</a> <span> [<a href="https://arxiv.org/pdf/2407.16148">pdf</a>, <a href="https://arxiv.org/format/2407.16148">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CHIME: LLM-Assisted Hierarchical Organization of Scientific Studies for Literature Review Support </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hsu%2C+C">Chao-Chun Hsu</a>, <a href="/search/cs?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/cs?searchtype=author&query=Sparks%2C+J">Jenna Sparks</a>, <a href="/search/cs?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+C">Chenhao Tan</a>, <a href="/search/cs?searchtype=author&query=Wadden%2C+D">David Wadden</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L+L">Lucy Lu Wang</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.16148v1-abstract-short" style="display: inline;"> Literature review requires researchers to synthesize a large amount of information and is increasingly challenging as the scientific literature expands. In this work, we investigate the potential of LLMs for producing hierarchical organizations of scientific studies to assist researchers with literature review. We define hierarchical organizations as tree structures where nodes refer to topical ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16148v1-abstract-full').style.display = 'inline'; document.getElementById('2407.16148v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.16148v1-abstract-full" style="display: none;"> Literature review requires researchers to synthesize a large amount of information and is increasingly challenging as the scientific literature expands. In this work, we investigate the potential of LLMs for producing hierarchical organizations of scientific studies to assist researchers with literature review. We define hierarchical organizations as tree structures where nodes refer to topical categories and every node is linked to the studies assigned to that category. Our naive LLM-based pipeline for hierarchy generation from a set of studies produces promising yet imperfect hierarchies, motivating us to collect CHIME, an expert-curated dataset for this task focused on biomedicine. Given the challenging and time-consuming nature of building hierarchies from scratch, we use a human-in-the-loop process in which experts correct errors (both links between categories and study assignment) in LLM-generated hierarchies. CHIME contains 2,174 LLM-generated hierarchies covering 472 topics, and expert-corrected hierarchies for a subset of 100 topics. Expert corrections allow us to quantify LLM performance, and we find that while they are quite good at generating and organizing categories, their assignment of studies to categories could be improved. We attempt to train a corrector model with human feedback which improves study assignment by 12.6 F1 points. We release our dataset and models to encourage research on developing better assistive tools for literature review. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16148v1-abstract-full').style.display = 'none'; document.getElementById('2407.16148v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2024 ACL Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18534">arXiv:2406.18534</a> <span> [<a href="https://arxiv.org/pdf/2406.18534">pdf</a>, <a href="https://arxiv.org/format/2406.18534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Compositionality in Concept Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Stein%2C+A">Adam Stein</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aaditya Naik</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yinjun Wu</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+M">Mayur Naik</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+E">Eric Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18534v1-abstract-short" style="display: inline;"> Concept-based interpretability methods offer a lens into the internals of foundation models by decomposing their embeddings into high-level concepts. These concept representations are most useful when they are compositional, meaning that the individual concepts compose to explain the full sample. We show that existing unsupervised concept extraction methods find concepts which are not compositiona… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18534v1-abstract-full').style.display = 'inline'; document.getElementById('2406.18534v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18534v1-abstract-full" style="display: none;"> Concept-based interpretability methods offer a lens into the internals of foundation models by decomposing their embeddings into high-level concepts. These concept representations are most useful when they are compositional, meaning that the individual concepts compose to explain the full sample. We show that existing unsupervised concept extraction methods find concepts which are not compositional. To automatically discover compositional concept representations, we identify two salient properties of such representations, and propose Compositional Concept Extraction (CCE) for finding concepts which obey these properties. We evaluate CCE on five different datasets over image and text data. Our evaluation shows that CCE finds more compositional concept representations than baselines and yields better accuracy on four downstream classification tasks. Code and data are available at https://github.com/adaminsky/compositional_concepts . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18534v1-abstract-full').style.display = 'none'; document.getElementById('2406.18534v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICML 2024. 26 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12725">arXiv:2406.12725</a> <span> [<a href="https://arxiv.org/pdf/2406.12725">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Can Large Language Models Code Like a Linguist?: A Case Study in Low Resource Sound Law Induction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kexun Zhang</a>, <a href="/search/cs?searchtype=author&query=Robinson%2C+N">Nathaniel Robinson</a>, <a href="/search/cs?searchtype=author&query=Mysore%2C+A">Aravind Mysore</a>, <a href="/search/cs?searchtype=author&query=Marr%2C+C">Clayton Marr</a>, <a href="/search/cs?searchtype=author&query=Byrnes%2C+H+S+R">Hong Sng Rebecca Byrnes</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+A">Anna Cai</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+K">Kalvin Chang</a>, <a href="/search/cs?searchtype=author&query=Mortensen%2C+D">David Mortensen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12725v1-abstract-short" style="display: inline;"> Historical linguists have long written a kind of incompletely formalized ''program'' that converts reconstructed words in an ancestor language into words in one of its attested descendants that consist of a series of ordered string rewrite functions (called sound laws). They do this by observing pairs of words in the reconstructed language (protoforms) and the descendent language (reflexes) and co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12725v1-abstract-full').style.display = 'inline'; document.getElementById('2406.12725v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12725v1-abstract-full" style="display: none;"> Historical linguists have long written a kind of incompletely formalized ''program'' that converts reconstructed words in an ancestor language into words in one of its attested descendants that consist of a series of ordered string rewrite functions (called sound laws). They do this by observing pairs of words in the reconstructed language (protoforms) and the descendent language (reflexes) and constructing a program that transforms protoforms into reflexes. However, writing these programs is error-prone and time-consuming. Prior work has successfully scaffolded this process computationally, but fewer researchers have tackled Sound Law Induction (SLI), which we approach in this paper by casting it as Programming by Examples. We propose a language-agnostic solution that utilizes the programming ability of Large Language Models (LLMs) by generating Python sound law programs from sound change examples. We evaluate the effectiveness of our approach for various LLMs, propose effective methods to generate additional language-agnostic synthetic data to fine-tune LLMs for SLI, and compare our method with existing automated SLI methods showing that while LLMs lag behind them they can complement some of their weaknesses. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12725v1-abstract-full').style.display = 'none'; document.getElementById('2406.12725v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07835">arXiv:2406.07835</a> <span> [<a href="https://arxiv.org/pdf/2406.07835">pdf</a>, <a href="https://arxiv.org/format/2406.07835">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SciRIFF: A Resource to Enhance Language Model Instruction-Following over Scientific Literature </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wadden%2C+D">David Wadden</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+K">Kejian Shi</a>, <a href="/search/cs?searchtype=author&query=Morrison%2C+J">Jacob Morrison</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+S">Shruti Singh</a>, <a href="/search/cs?searchtype=author&query=Barzilay%2C+N">Nitzan Barzilay</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/cs?searchtype=author&query=Hope%2C+T">Tom Hope</a>, <a href="/search/cs?searchtype=author&query=Soldaini%2C+L">Luca Soldaini</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+S+Z">Shannon Zejiang Shen</a>, <a href="/search/cs?searchtype=author&query=Downey%2C+D">Doug Downey</a>, <a href="/search/cs?searchtype=author&query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a>, <a href="/search/cs?searchtype=author&query=Cohan%2C+A">Arman Cohan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07835v3-abstract-short" style="display: inline;"> We present SciRIFF (Scientific Resource for Instruction-Following and Finetuning), a dataset of 137K instruction-following demonstrations for 54 tasks covering five essential scientific literature understanding capabilities: information extraction, summarization, question answering, claim verification, and classification. SciRIFF demonstrations are notable for their long input contexts, detailed t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07835v3-abstract-full').style.display = 'inline'; document.getElementById('2406.07835v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07835v3-abstract-full" style="display: none;"> We present SciRIFF (Scientific Resource for Instruction-Following and Finetuning), a dataset of 137K instruction-following demonstrations for 54 tasks covering five essential scientific literature understanding capabilities: information extraction, summarization, question answering, claim verification, and classification. SciRIFF demonstrations are notable for their long input contexts, detailed task specifications, and complex structured outputs. While instruction-following resources are available in specific domains such as clinical medicine and chemistry, SciRIFF is the first dataset focused on extracting and synthesizing information from research literature across a wide range of scientific fields. To demonstrate the utility of SciRIFF, we develop a sample-efficient strategy to adapt a general instruction-following model for science by performing additional finetuning on a mix of general-domain and SciRIFF demonstrations. In evaluations on nine held-out scientific tasks, our model -- called SciTulu -- improves over a strong LLM baseline by 28.1% and 6.5% at the 7B and 70B scales respectively, while maintaining general instruction-following performance within 2% of the baseline. We are optimistic that SciRIFF will facilitate the development and evaluation of LLMs to help researchers navigate the ever-growing body of scientific literature. We release our dataset, model checkpoints, and data processing and evaluation code to enable further research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07835v3-abstract-full').style.display = 'none'; document.getElementById('2406.07835v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to NeurIPS Datasets and Benchmarks 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20501">arXiv:2405.20501</a> <span> [<a href="https://arxiv.org/pdf/2405.20501">pdf</a>, <a href="https://arxiv.org/format/2405.20501">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5555/3545946.3598805">10.5555/3545946.3598805 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> ShelfHelp: Empowering Humans to Perform Vision-Independent Manipulation Tasks with a Socially Assistive Robotic Cane </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Agrawal%2C+S">Shivendra Agrawal</a>, <a href="/search/cs?searchtype=author&query=Nayak%2C+S">Suresh Nayak</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Ashutosh Naik</a>, <a href="/search/cs?searchtype=author&query=Hayes%2C+B">Bradley Hayes</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20501v1-abstract-short" style="display: inline;"> The ability to shop independently, especially in grocery stores, is important for maintaining a high quality of life. This can be particularly challenging for people with visual impairments (PVI). Stores carry thousands of products, with approximately 30,000 new products introduced each year in the US market alone, presenting a challenge even for modern computer vision solutions. Through this work… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20501v1-abstract-full').style.display = 'inline'; document.getElementById('2405.20501v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20501v1-abstract-full" style="display: none;"> The ability to shop independently, especially in grocery stores, is important for maintaining a high quality of life. This can be particularly challenging for people with visual impairments (PVI). Stores carry thousands of products, with approximately 30,000 new products introduced each year in the US market alone, presenting a challenge even for modern computer vision solutions. Through this work, we present a proof-of-concept socially assistive robotic system we call ShelfHelp, and propose novel technical solutions for enhancing instrumented canes traditionally meant for navigation tasks with additional capability within the domain of shopping. ShelfHelp includes a novel visual product locator algorithm designed for use in grocery stores and a novel planner that autonomously issues verbal manipulation guidance commands to guide the user during product retrieval. Through a human subjects study, we show the system's success in locating and providing effective manipulation guidance to retrieve desired products with novice users. We compare two autonomous verbal guidance modes achieving comparable performance to a human assistance baseline and present encouraging findings that validate our system's efficiency and effectiveness and through positive subjective metrics including competence, intelligence, and ease of use. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20501v1-abstract-full').style.display = 'none'; document.getElementById('2405.20501v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 14 figures and charts</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> In AAMAS (pp. 1514-1523) 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09999">arXiv:2405.09999</a> <span> [<a href="https://arxiv.org/pdf/2405.09999">pdf</a>, <a href="https://arxiv.org/format/2405.09999">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Reward Centering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Abhishek Naik</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+Y">Yi Wan</a>, <a href="/search/cs?searchtype=author&query=Tomar%2C+M">Manan Tomar</a>, <a href="/search/cs?searchtype=author&query=Sutton%2C+R+S">Richard S. Sutton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09999v2-abstract-short" style="display: inline;"> We show that discounted methods for solving continuing reinforcement learning problems can perform significantly better if they center their rewards by subtracting out the rewards' empirical average. The improvement is substantial at commonly used discount factors and increases further as the discount factor approaches one. In addition, we show that if a problem's rewards are shifted by a constant… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09999v2-abstract-full').style.display = 'inline'; document.getElementById('2405.09999v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09999v2-abstract-full" style="display: none;"> We show that discounted methods for solving continuing reinforcement learning problems can perform significantly better if they center their rewards by subtracting out the rewards' empirical average. The improvement is substantial at commonly used discount factors and increases further as the discount factor approaches one. In addition, we show that if a problem's rewards are shifted by a constant, then standard methods perform much worse, whereas methods with reward centering are unaffected. Estimating the average reward is straightforward in the on-policy setting; we propose a slightly more sophisticated method for the off-policy setting. Reward centering is a general idea, so we expect almost every reinforcement-learning algorithm to benefit by the addition of reward centering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09999v2-abstract-full').style.display = 'none'; document.getElementById('2405.09999v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings of RLC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01580">arXiv:2405.01580</a> <span> [<a href="https://arxiv.org/pdf/2405.01580">pdf</a>, <a href="https://arxiv.org/format/2405.01580">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> On the Limitations of Embedding Based Methods for Measuring Functional Correctness for Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01580v1-abstract-short" style="display: inline;"> The task of code generation from natural language (NL2Code) has become extremely popular, especially with the advent of Large Language Models (LLMs). However, efforts to quantify and track this progress have suffered due to a lack of reliable metrics for functional correctness. While popular benchmarks like HumanEval have test cases to enable reliable evaluation of correctness, it is time-consumin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01580v1-abstract-full').style.display = 'inline'; document.getElementById('2405.01580v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01580v1-abstract-full" style="display: none;"> The task of code generation from natural language (NL2Code) has become extremely popular, especially with the advent of Large Language Models (LLMs). However, efforts to quantify and track this progress have suffered due to a lack of reliable metrics for functional correctness. While popular benchmarks like HumanEval have test cases to enable reliable evaluation of correctness, it is time-consuming and requires human effort to collect test cases. As an alternative several reference-based evaluation metrics have been proposed, with embedding-based metrics like CodeBERTScore being touted as having a high correlation with human preferences and functional correctness. In our work, we analyze the ability of embedding-based metrics like CodeBERTScore to measure functional correctness and other helpful constructs like editing effort by analyzing outputs of ten models over two popular code generation benchmarks. Our results show that while they have a weak correlation with functional correctness (0.16), they are strongly correlated (0.72) with editing effort. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01580v1-abstract-full').style.display = 'none'; document.getElementById('2405.01580v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.18262">arXiv:2404.18262</a> <span> [<a href="https://arxiv.org/pdf/2404.18262">pdf</a>, <a href="https://arxiv.org/format/2404.18262">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generating Situated Reflection Triggers about Alternative Solution Paths: A Case Study of Generative AI for Computer-Supported Collaborative Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+J+R">Jessica Ruhan Yin</a>, <a href="/search/cs?searchtype=author&query=Kamath%2C+A">Anusha Kamath</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Q">Qianou Ma</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S+T">Sherry Tongshuang Wu</a>, <a href="/search/cs?searchtype=author&query=Murray%2C+C">Charles Murray</a>, <a href="/search/cs?searchtype=author&query=Bogart%2C+C">Christopher Bogart</a>, <a href="/search/cs?searchtype=author&query=Sakr%2C+M">Majd Sakr</a>, <a href="/search/cs?searchtype=author&query=Rose%2C+C+P">Carolyn P. Rose</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.18262v1-abstract-short" style="display: inline;"> An advantage of Large Language Models (LLMs) is their contextualization capability - providing different responses based on student inputs like solution strategy or prior discussion, to potentially better engage students than standard feedback. We present a design and evaluation of a proof-of-concept LLM application to offer students dynamic and contextualized feedback. Specifically, we augment an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18262v1-abstract-full').style.display = 'inline'; document.getElementById('2404.18262v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.18262v1-abstract-full" style="display: none;"> An advantage of Large Language Models (LLMs) is their contextualization capability - providing different responses based on student inputs like solution strategy or prior discussion, to potentially better engage students than standard feedback. We present a design and evaluation of a proof-of-concept LLM application to offer students dynamic and contextualized feedback. Specifically, we augment an Online Programming Exercise bot for a college-level Cloud Computing course with ChatGPT, which offers students contextualized reflection triggers during a collaborative query optimization task in database design. We demonstrate that LLMs can be used to generate highly situated reflection triggers that incorporate details of the collaborative discussion happening in context. We discuss in depth the exploration of the design space of the triggers and their correspondence with the learning objectives as well as the impact on student learning in a pilot study with 34 students. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.18262v1-abstract-full').style.display = 'none'; document.getElementById('2404.18262v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.10100">arXiv:2404.10100</a> <span> [<a href="https://arxiv.org/pdf/2404.10100">pdf</a>, <a href="https://arxiv.org/format/2404.10100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSE.2024.3428972">10.1109/TSE.2024.3428972 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> LLM-Based Test-Driven Interactive Code Generation: User Study and Empirical Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fakhoury%2C+S">Sarah Fakhoury</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aaditya Naik</a>, <a href="/search/cs?searchtype=author&query=Sakkas%2C+G">Georgios Sakkas</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+S">Saikat Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Lahiri%2C+S+K">Shuvendu K. Lahiri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.10100v2-abstract-short" style="display: inline;"> Large language models (LLMs) have shown great potential in automating significant aspects of coding by producing natural code from informal natural language (NL) intent. However, given NL is informal, it does not lend easily to checking that the generated code correctly satisfies the user intent. In this paper, we propose a novel interactive workflow TiCoder for guided intent clarification (i.e.,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10100v2-abstract-full').style.display = 'inline'; document.getElementById('2404.10100v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.10100v2-abstract-full" style="display: none;"> Large language models (LLMs) have shown great potential in automating significant aspects of coding by producing natural code from informal natural language (NL) intent. However, given NL is informal, it does not lend easily to checking that the generated code correctly satisfies the user intent. In this paper, we propose a novel interactive workflow TiCoder for guided intent clarification (i.e., partial formalization) through tests to support the generation of more accurate code suggestions. Through a mixed methods user study with 15 programmers, we present an empirical evaluation of the effectiveness of the workflow to improve code generation accuracy. We find that participants using the proposed workflow are significantly more likely to correctly evaluate AI generated code, and report significantly less task-induced cognitive load. Furthermore, we test the potential of the workflow at scale with four different state-of-the-art LLMs on two python datasets, using an idealized proxy for a user feedback. We observe an average absolute improvement of 45.97% in the pass@1 code generation accuracy for both datasets and across all LLMs within 5 user interactions, in addition to the automatic generation of accompanying unit tests. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10100v2-abstract-full').style.display = 'none'; document.getElementById('2404.10100v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Transactions on Software Engineering, vol. 50, no. 09, pp. 2254-2268, 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> in IEEE Transactions on Software Engineering, vol. 50, no. 09, pp. 2254-2268, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09047">arXiv:2404.09047</a> <span> [<a href="https://arxiv.org/pdf/2404.09047">pdf</a>, <a href="https://arxiv.org/format/2404.09047">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Multilingual Evaluation of Semantic Textual Relatedness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Endait%2C+S">Sharvi Endait</a>, <a href="/search/cs?searchtype=author&query=Sonavane%2C+S">Srushti Sonavane</a>, <a href="/search/cs?searchtype=author&query=Sinare%2C+R">Ridhima Sinare</a>, <a href="/search/cs?searchtype=author&query=Rohera%2C+P">Pritika Rohera</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Advait Naik</a>, <a href="/search/cs?searchtype=author&query=Kadam%2C+D">Dipali Kadam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09047v1-abstract-short" style="display: inline;"> The explosive growth of online content demands robust Natural Language Processing (NLP) techniques that can capture nuanced meanings and cultural context across diverse languages. Semantic Textual Relatedness (STR) goes beyond superficial word overlap, considering linguistic elements and non-linguistic factors like topic, sentiment, and perspective. Despite its pivotal role, prior NLP research has… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09047v1-abstract-full').style.display = 'inline'; document.getElementById('2404.09047v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09047v1-abstract-full" style="display: none;"> The explosive growth of online content demands robust Natural Language Processing (NLP) techniques that can capture nuanced meanings and cultural context across diverse languages. Semantic Textual Relatedness (STR) goes beyond superficial word overlap, considering linguistic elements and non-linguistic factors like topic, sentiment, and perspective. Despite its pivotal role, prior NLP research has predominantly focused on English, limiting its applicability across languages. Addressing this gap, our paper dives into capturing deeper connections between sentences beyond simple word overlap. Going beyond English-centric NLP research, we explore STR in Marathi, Hindi, Spanish, and English, unlocking the potential for information retrieval, machine translation, and more. Leveraging the SemEval-2024 shared task, we explore various language models across three learning paradigms: supervised, unsupervised, and cross-lingual. Our comprehensive methodology gains promising results, demonstrating the effectiveness of our approach. This work aims to not only showcase our achievements but also inspire further research in multilingual STR, particularly for low-resourced languages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09047v1-abstract-full').style.display = 'none'; document.getElementById('2404.09047v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06611">arXiv:2404.06611</a> <span> [<a href="https://arxiv.org/pdf/2404.06611">pdf</a>, <a href="https://arxiv.org/format/2404.06611">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Modeling social interaction dynamics using temporal graph networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+J+T">J. Taery Kim</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Archit Naik</a>, <a href="/search/cs?searchtype=author&query=Jayarathne%2C+I">Isuru Jayarathne</a>, <a href="/search/cs?searchtype=author&query=Ha%2C+S">Sehoon Ha</a>, <a href="/search/cs?searchtype=author&query=Chew%2C+J+Y">Jouh Yeong Chew</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06611v1-abstract-short" style="display: inline;"> Integrating intelligent systems, such as robots, into dynamic group settings poses challenges due to the mutual influence of human behaviors and internal states. A robust representation of social interaction dynamics is essential for effective human-robot collaboration. Existing approaches often narrow their focus to facial expressions or speech, overlooking the broader context. We propose employi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06611v1-abstract-full').style.display = 'inline'; document.getElementById('2404.06611v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06611v1-abstract-full" style="display: none;"> Integrating intelligent systems, such as robots, into dynamic group settings poses challenges due to the mutual influence of human behaviors and internal states. A robust representation of social interaction dynamics is essential for effective human-robot collaboration. Existing approaches often narrow their focus to facial expressions or speech, overlooking the broader context. We propose employing an adapted Temporal Graph Networks to comprehensively represent social interaction dynamics while enabling its practical implementation. Our method incorporates temporal multi-modal behavioral data including gaze interaction, voice activity and environmental context. This representation of social interaction dynamics is trained as a link prediction problem using annotated gaze interaction data. The F1-score outperformed the baseline model by 37.0%. This improvement is consistent for a secondary task of next speaker prediction which achieves an improvement of 29.0%. Our contributions are two-fold, including a model to representing social interaction dynamics which can be used for many downstream human-robot interaction tasks like human state inference and next speaker prediction. More importantly, this is achieved using a more concise yet efficient message passing method, significantly reducing it from 768 to 14 elements, while outperforming the baseline model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06611v1-abstract-full').style.display = 'none'; document.getElementById('2404.06611v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 4 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 33rd IEEE International Conference on Robot & Human Interactive Communication (RO-MAN 2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00152">arXiv:2404.00152</a> <span> [<a href="https://arxiv.org/pdf/2404.00152">pdf</a>, <a href="https://arxiv.org/format/2404.00152">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> On-the-fly Definition Augmentation of LLMs for Biomedical NER </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Munnangi%2C+M">Monica Munnangi</a>, <a href="/search/cs?searchtype=author&query=Feldman%2C+S">Sergey Feldman</a>, <a href="/search/cs?searchtype=author&query=Wallace%2C+B+C">Byron C Wallace</a>, <a href="/search/cs?searchtype=author&query=Amir%2C+S">Silvio Amir</a>, <a href="/search/cs?searchtype=author&query=Hope%2C+T">Tom Hope</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00152v2-abstract-short" style="display: inline;"> Despite their general capabilities, LLMs still struggle on biomedical NER tasks, which are difficult due to the presence of specialized terminology and lack of training data. In this work we set out to improve LLM performance on biomedical NER in limited data settings via a new knowledge augmentation approach which incorporates definitions of relevant concepts on-the-fly. During this process, to p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00152v2-abstract-full').style.display = 'inline'; document.getElementById('2404.00152v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00152v2-abstract-full" style="display: none;"> Despite their general capabilities, LLMs still struggle on biomedical NER tasks, which are difficult due to the presence of specialized terminology and lack of training data. In this work we set out to improve LLM performance on biomedical NER in limited data settings via a new knowledge augmentation approach which incorporates definitions of relevant concepts on-the-fly. During this process, to provide a test bed for knowledge augmentation, we perform a comprehensive exploration of prompting strategies. Our experiments show that definition augmentation is useful for both open source and closed LLMs. For example, it leads to a relative improvement of 15\% (on average) in GPT-4 performance (F1) across all (six) of our test datasets. We conduct extensive ablations and analyses to demonstrate that our performance improvements stem from adding relevant definitional knowledge. We find that careful prompting strategies also improve LLM performance, allowing them to outperform fine-tuned language models in few-shot settings. To facilitate future research in this direction, we release our code at https://github.com/allenai/beacon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00152v2-abstract-full').style.display = 'none'; document.getElementById('2404.00152v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at NAACL 2024 (Main)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.00838">arXiv:2402.00838</a> <span> [<a href="https://arxiv.org/pdf/2402.00838">pdf</a>, <a href="https://arxiv.org/format/2402.00838">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> OLMo: Accelerating the Science of Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Groeneveld%2C+D">Dirk Groeneveld</a>, <a href="/search/cs?searchtype=author&query=Beltagy%2C+I">Iz Beltagy</a>, <a href="/search/cs?searchtype=author&query=Walsh%2C+P">Pete Walsh</a>, <a href="/search/cs?searchtype=author&query=Bhagia%2C+A">Akshita Bhagia</a>, <a href="/search/cs?searchtype=author&query=Kinney%2C+R">Rodney Kinney</a>, <a href="/search/cs?searchtype=author&query=Tafjord%2C+O">Oyvind Tafjord</a>, <a href="/search/cs?searchtype=author&query=Jha%2C+A+H">Ananya Harsh Jha</a>, <a href="/search/cs?searchtype=author&query=Ivison%2C+H">Hamish Ivison</a>, <a href="/search/cs?searchtype=author&query=Magnusson%2C+I">Ian Magnusson</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yizhong Wang</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+S">Shane Arora</a>, <a href="/search/cs?searchtype=author&query=Atkinson%2C+D">David Atkinson</a>, <a href="/search/cs?searchtype=author&query=Authur%2C+R">Russell Authur</a>, <a href="/search/cs?searchtype=author&query=Chandu%2C+K+R">Khyathi Raghavi Chandu</a>, <a href="/search/cs?searchtype=author&query=Cohan%2C+A">Arman Cohan</a>, <a href="/search/cs?searchtype=author&query=Dumas%2C+J">Jennifer Dumas</a>, <a href="/search/cs?searchtype=author&query=Elazar%2C+Y">Yanai Elazar</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yuling Gu</a>, <a href="/search/cs?searchtype=author&query=Hessel%2C+J">Jack Hessel</a>, <a href="/search/cs?searchtype=author&query=Khot%2C+T">Tushar Khot</a>, <a href="/search/cs?searchtype=author&query=Merrill%2C+W">William Merrill</a>, <a href="/search/cs?searchtype=author&query=Morrison%2C+J">Jacob Morrison</a>, <a href="/search/cs?searchtype=author&query=Muennighoff%2C+N">Niklas Muennighoff</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Nam%2C+C">Crystal Nam</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.00838v4-abstract-short" style="display: inline;"> Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.00838v4-abstract-full').style.display = 'inline'; document.getElementById('2402.00838v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.00838v4-abstract-full" style="display: none;"> Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models, including their biases and potential risks, we believe it is essential for the research community to have access to powerful, truly open LMs. To this end, we have built OLMo, a competitive, truly Open Language Model, to enable the scientific study of language models. Unlike most prior efforts that have only released model weights and inference code, we release OLMo alongside open training data and training and evaluation code. We hope this release will empower the open research community and inspire a new wave of innovation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.00838v4-abstract-full').style.display = 'none'; document.getElementById('2402.00838v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.00159">arXiv:2402.00159</a> <span> [<a href="https://arxiv.org/pdf/2402.00159">pdf</a>, <a href="https://arxiv.org/format/2402.00159">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Dolma: an Open Corpus of Three Trillion Tokens for Language Model Pretraining Research </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Soldaini%2C+L">Luca Soldaini</a>, <a href="/search/cs?searchtype=author&query=Kinney%2C+R">Rodney Kinney</a>, <a href="/search/cs?searchtype=author&query=Bhagia%2C+A">Akshita Bhagia</a>, <a href="/search/cs?searchtype=author&query=Schwenk%2C+D">Dustin Schwenk</a>, <a href="/search/cs?searchtype=author&query=Atkinson%2C+D">David Atkinson</a>, <a href="/search/cs?searchtype=author&query=Authur%2C+R">Russell Authur</a>, <a href="/search/cs?searchtype=author&query=Bogin%2C+B">Ben Bogin</a>, <a href="/search/cs?searchtype=author&query=Chandu%2C+K">Khyathi Chandu</a>, <a href="/search/cs?searchtype=author&query=Dumas%2C+J">Jennifer Dumas</a>, <a href="/search/cs?searchtype=author&query=Elazar%2C+Y">Yanai Elazar</a>, <a href="/search/cs?searchtype=author&query=Hofmann%2C+V">Valentin Hofmann</a>, <a href="/search/cs?searchtype=author&query=Jha%2C+A+H">Ananya Harsh Jha</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+S">Sachin Kumar</a>, <a href="/search/cs?searchtype=author&query=Lucy%2C+L">Li Lucy</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+X">Xinxi Lyu</a>, <a href="/search/cs?searchtype=author&query=Lambert%2C+N">Nathan Lambert</a>, <a href="/search/cs?searchtype=author&query=Magnusson%2C+I">Ian Magnusson</a>, <a href="/search/cs?searchtype=author&query=Morrison%2C+J">Jacob Morrison</a>, <a href="/search/cs?searchtype=author&query=Muennighoff%2C+N">Niklas Muennighoff</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Nam%2C+C">Crystal Nam</a>, <a href="/search/cs?searchtype=author&query=Peters%2C+M+E">Matthew E. Peters</a>, <a href="/search/cs?searchtype=author&query=Ravichander%2C+A">Abhilasha Ravichander</a>, <a href="/search/cs?searchtype=author&query=Richardson%2C+K">Kyle Richardson</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zejiang Shen</a> , et al. (11 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.00159v2-abstract-short" style="display: inline;"> Information about pretraining corpora used to train the current best-performing language models is seldom discussed: commercial models rarely detail their data, and even open models are often released without accompanying training data or recipes to reproduce them. As a result, it is challenging to conduct and advance scientific research on language modeling, such as understanding how training dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.00159v2-abstract-full').style.display = 'inline'; document.getElementById('2402.00159v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.00159v2-abstract-full" style="display: none;"> Information about pretraining corpora used to train the current best-performing language models is seldom discussed: commercial models rarely detail their data, and even open models are often released without accompanying training data or recipes to reproduce them. As a result, it is challenging to conduct and advance scientific research on language modeling, such as understanding how training data impacts model capabilities and limitations. To facilitate scientific research on language model pretraining, we curate and release Dolma, a three-trillion-token English corpus, built from a diverse mixture of web content, scientific papers, code, public-domain books, social media, and encyclopedic materials. We extensively document Dolma, including its design principles, details about its construction, and a summary of its contents. We present analyses and experimental results on intermediate states of Dolma to share what we have learned about important data curation practices. Finally, we open-source our data curation toolkit to enable reproduction of our work as well as support further research in large-scale data curation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.00159v2-abstract-full').style.display = 'none'; document.getElementById('2402.00159v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ACL 2024; Dataset: https://hf.co/datasets/allenai/dolma; Code: https://github.com/allenai/dolma</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11803">arXiv:2312.11803</a> <span> [<a href="https://arxiv.org/pdf/2312.11803">pdf</a>, <a href="https://arxiv.org/format/2312.11803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> NLP for Maternal Healthcare: Perspectives and Guiding Principles in the Age of LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Antoniak%2C+M">Maria Antoniak</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Alvarado%2C+C+S">Carla S. Alvarado</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L+L">Lucy Lu Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+I+Y">Irene Y. Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11803v2-abstract-short" style="display: inline;"> Ethical frameworks for the use of natural language processing (NLP) are urgently needed to shape how large language models (LLMs) and similar tools are used for healthcare applications. Healthcare faces existing challenges including the balance of power in clinician-patient relationships, systemic health disparities, historical injustices, and economic constraints. Drawing directly from the voices… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11803v2-abstract-full').style.display = 'inline'; document.getElementById('2312.11803v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11803v2-abstract-full" style="display: none;"> Ethical frameworks for the use of natural language processing (NLP) are urgently needed to shape how large language models (LLMs) and similar tools are used for healthcare applications. Healthcare faces existing challenges including the balance of power in clinician-patient relationships, systemic health disparities, historical injustices, and economic constraints. Drawing directly from the voices of those most affected, and focusing on a case study of a specific healthcare setting, we propose a set of guiding principles for the use of NLP in maternal healthcare. We led an interactive session centered on an LLM-based chatbot demonstration during a full-day workshop with 39 participants, and additionally surveyed 30 healthcare workers and 30 birthing people about their values, needs, and perceptions of NLP tools in the context of maternal health. We conducted quantitative and qualitative analyses of the survey results and interactive discussions to consolidate our findings into a set of guiding principles. We propose nine principles for ethical use of NLP for maternal healthcare, grouped into three themes: (i) recognizing contextual significance (ii) holistic measurements, and (iii) who/what is valued. For each principle, we describe its underlying rationale and provide practical advice. This set of principles can provide a methodological pattern for other researchers and serve as a resource to practitioners working on maternal health and other healthcare fields to emphasize the importance of technical nuance, historical context, and inclusive design when developing NLP technologies for clinical use. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11803v2-abstract-full').style.display = 'none'; document.getElementById('2312.11803v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.09736">arXiv:2311.09736</a> <span> [<a href="https://arxiv.org/pdf/2311.09736">pdf</a>, <a href="https://arxiv.org/format/2311.09736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CARE: Extracting Experimental Findings From Clinical Literature </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/cs?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/cs?searchtype=author&query=Downey%2C+D">Doug Downey</a>, <a href="/search/cs?searchtype=author&query=Hope%2C+T">Tom Hope</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.09736v2-abstract-short" style="display: inline;"> Extracting fine-grained experimental findings from literature can provide dramatic utility for scientific applications. Prior work has developed annotation schemas and datasets for limited aspects of this problem, failing to capture the real-world complexity and nuance required. Focusing on biomedicine, this work presents CARE -- a new IE dataset for the task of extracting clinical findings. We de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09736v2-abstract-full').style.display = 'inline'; document.getElementById('2311.09736v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.09736v2-abstract-full" style="display: none;"> Extracting fine-grained experimental findings from literature can provide dramatic utility for scientific applications. Prior work has developed annotation schemas and datasets for limited aspects of this problem, failing to capture the real-world complexity and nuance required. Focusing on biomedicine, this work presents CARE -- a new IE dataset for the task of extracting clinical findings. We develop a new annotation schema capturing fine-grained findings as n-ary relations between entities and attributes, which unifies phenomena challenging for current IE systems such as discontinuous entity spans, nested relations, variable arity n-ary relations and numeric results in a single schema. We collect extensive annotations for 700 abstracts from two sources: clinical trials and case reports. We also demonstrate the generalizability of our schema to the computer science and materials science domains. We benchmark state-of-the-art IE systems on CARE, showing that even models such as GPT4 struggle. We release our resources to advance research on extracting and aggregating literature findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09736v2-abstract-full').style.display = 'none'; document.getElementById('2311.09736v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at NAACL Findings 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.09564">arXiv:2311.09564</a> <span> [<a href="https://arxiv.org/pdf/2311.09564">pdf</a>, <a href="https://arxiv.org/format/2311.09564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LongBoX: Evaluating Transformers on Long-Sequence Clinical Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+H">Himanshu Gupta</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+D">Disha Agrawal</a>, <a href="/search/cs?searchtype=author&query=Baral%2C+C">Chitta Baral</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.09564v1-abstract-short" style="display: inline;"> Many large language models (LLMs) for medicine have largely been evaluated on short texts, and their ability to handle longer sequences such as a complete electronic health record (EHR) has not been systematically explored. Assessing these models on long sequences is crucial since prior work in the general domain has demonstrated performance degradation of LLMs on longer texts. Motivated by this,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09564v1-abstract-full').style.display = 'inline'; document.getElementById('2311.09564v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.09564v1-abstract-full" style="display: none;"> Many large language models (LLMs) for medicine have largely been evaluated on short texts, and their ability to handle longer sequences such as a complete electronic health record (EHR) has not been systematically explored. Assessing these models on long sequences is crucial since prior work in the general domain has demonstrated performance degradation of LLMs on longer texts. Motivated by this, we introduce LongBoX, a collection of seven medical datasets in text-to-text format, designed to investigate model performance on long sequences. Preliminary experiments reveal that both medical LLMs (e.g., BioGPT) and strong general domain LLMs (e.g., FLAN-T5) struggle on this benchmark. We further evaluate two techniques designed for long-sequence handling: (i) local-global attention, and (ii) Fusion-in-Decoder (FiD). Our results demonstrate mixed results with long-sequence handling - while scores on some datasets increase, there is substantial room for improvement. We hope that LongBoX facilitates the development of more effective long-sequence techniques for the medical domain. Data and source code are available at https://github.com/Mihir3009/LongBoX. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.09564v1-abstract-full').style.display = 'none'; document.getElementById('2311.09564v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.00317">arXiv:2311.00317</a> <span> [<a href="https://arxiv.org/pdf/2311.00317">pdf</a>, <a href="https://arxiv.org/format/2311.00317">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Data Augmentation for Code Translation with Comparable Corpora and Multiple References </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yiqing Xie</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Fried%2C+D">Daniel Fried</a>, <a href="/search/cs?searchtype=author&query=Rose%2C+C">Carolyn Rose</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.00317v2-abstract-short" style="display: inline;"> One major challenge of translating code between programming languages is that parallel training data is often limited. To overcome this challenge, we present two data augmentation techniques, one that builds comparable corpora (i.e., code pairs with similar functionality), and another that augments existing parallel data with multiple reference translations. Specifically, we build and analyze mult… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00317v2-abstract-full').style.display = 'inline'; document.getElementById('2311.00317v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.00317v2-abstract-full" style="display: none;"> One major challenge of translating code between programming languages is that parallel training data is often limited. To overcome this challenge, we present two data augmentation techniques, one that builds comparable corpora (i.e., code pairs with similar functionality), and another that augments existing parallel data with multiple reference translations. Specifically, we build and analyze multiple types of comparable corpora, including programs generated from natural language documentation using a code generation model. Furthermore, to reduce overfitting to a single reference translation, we automatically generate additional translation references for available parallel data and filter the translations by unit tests, which increases variation in target translations. Experiments show that our data augmentation techniques significantly improve CodeT5 for translation between Java, Python, and C++ by an average of 7.5% Computational Accuracy (CA@1), which verifies the correctness of translations by execution. The code is available at https://github.com/Veronicium/CMTrans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00317v2-abstract-full').style.display = 'none'; document.getElementById('2311.00317v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2023 Findings (with minor updates on the flowcharts)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.06686">arXiv:2308.06686</a> <span> [<a href="https://arxiv.org/pdf/2308.06686">pdf</a>, <a href="https://arxiv.org/format/2308.06686">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> TorchQL: A Programming Framework for Integrity Constraints in Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aaditya Naik</a>, <a href="/search/cs?searchtype=author&query=Stein%2C+A">Adam Stein</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yinjun Wu</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+M">Mayur Naik</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+E">Eric Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.06686v4-abstract-short" style="display: inline;"> Finding errors in machine learning applications requires a thorough exploration of their behavior over data. Existing approaches used by practitioners are often ad-hoc and lack the abstractions needed to scale this process. We present TorchQL, a programming framework to evaluate and improve the correctness of machine learning applications. TorchQL allows users to write queries to specify and check… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06686v4-abstract-full').style.display = 'inline'; document.getElementById('2308.06686v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.06686v4-abstract-full" style="display: none;"> Finding errors in machine learning applications requires a thorough exploration of their behavior over data. Existing approaches used by practitioners are often ad-hoc and lack the abstractions needed to scale this process. We present TorchQL, a programming framework to evaluate and improve the correctness of machine learning applications. TorchQL allows users to write queries to specify and check integrity constraints over machine learning models and datasets. It seamlessly integrates relational algebra with functional programming to allow for highly expressive queries using only eight intuitive operators. We evaluate TorchQL on diverse use-cases including finding critical temporal inconsistencies in objects detected across video frames in autonomous driving, finding data imputation errors in time-series medical records, finding data labeling errors in real-world images, and evaluating biases and constraining outputs of language models. Our experiments show that TorchQL enables up to 13x faster query executions than baselines like Pandas and MongoDB, and up to 40% shorter queries than native Python. We also conduct a user study and find that TorchQL is natural enough for developers familiar with Python to specify complex integrity constraints. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06686v4-abstract-full').style.display = 'none'; document.getElementById('2308.06686v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.11694">arXiv:2307.11694</a> <span> [<a href="https://arxiv.org/pdf/2307.11694">pdf</a>, <a href="https://arxiv.org/format/2307.11694">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Molecular Networks">q-bio.MN</span> </div> </div> <p class="title is-5 mathjax"> SynerGPT: In-Context Learning for Personalized Drug Synergy Prediction and Drug Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Edwards%2C+C">Carl Edwards</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Khot%2C+T">Tushar Khot</a>, <a href="/search/cs?searchtype=author&query=Burke%2C+M">Martin Burke</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+H">Heng Ji</a>, <a href="/search/cs?searchtype=author&query=Hope%2C+T">Tom Hope</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.11694v2-abstract-short" style="display: inline;"> Predicting synergistic drug combinations can help accelerate discovery of cancer treatments, particularly therapies personalized to a patient's specific tumor via biopsied cells. In this paper, we propose a novel setting and models for in-context drug synergy learning. We are given a small "personalized dataset" of 10-20 drug synergy relationships in the context of specific cancer cell targets. Ou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11694v2-abstract-full').style.display = 'inline'; document.getElementById('2307.11694v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.11694v2-abstract-full" style="display: none;"> Predicting synergistic drug combinations can help accelerate discovery of cancer treatments, particularly therapies personalized to a patient's specific tumor via biopsied cells. In this paper, we propose a novel setting and models for in-context drug synergy learning. We are given a small "personalized dataset" of 10-20 drug synergy relationships in the context of specific cancer cell targets. Our goal is to predict additional drug synergy relationships in that context. Inspired by recent work that pre-trains a GPT language model (LM) to "in-context learn" common function classes, we devise novel pre-training schemes that enable a GPT model to in-context learn "drug synergy functions". Our model -- which does not use any textual corpora, molecular fingerprints, protein interaction or any other domain-specific knowledge -- is able to achieve competitive results. We further integrate our in-context approach with a genetic algorithm to optimize model prompts and select synergy candidates to test after conducting a patient biopsy. Finally, we explore a novel task of inverse drug design which can potentially enable the design of drugs that synergize specifically to target a given patient's "personalized dataset". Our findings can potentially have an important impact on precision cancer medicine, and also raise intriguing questions on non-textual pre-training for LMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11694v2-abstract-full').style.display = 'none'; document.getElementById('2307.11694v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.01155">arXiv:2307.01155</a> <span> [<a href="https://arxiv.org/pdf/2307.01155">pdf</a>, <a href="https://arxiv.org/format/2307.01155">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Finance">q-fin.CP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> From Portfolio Optimization to Quantum Blockchain and Security: A Systematic Review of Quantum Computing in Finance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Abha Naik</a>, <a href="/search/cs?searchtype=author&query=Yeniaras%2C+E">Esra Yeniaras</a>, <a href="/search/cs?searchtype=author&query=Hellstern%2C+G">Gerhard Hellstern</a>, <a href="/search/cs?searchtype=author&query=Prasad%2C+G">Grishma Prasad</a>, <a href="/search/cs?searchtype=author&query=Vishwakarma%2C+S+K+L+P">Sanjay Kumar Lalta Prasad Vishwakarma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.01155v1-abstract-short" style="display: inline;"> In this paper, we provide an overview of the recent work in the quantum finance realm from various perspectives. The applications in consideration are Portfolio Optimization, Fraud Detection, and Monte Carlo methods for derivative pricing and risk calculation. Furthermore, we give a comprehensive overview of the applications of quantum computing in the field of blockchain technology which is a mai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01155v1-abstract-full').style.display = 'inline'; document.getElementById('2307.01155v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.01155v1-abstract-full" style="display: none;"> In this paper, we provide an overview of the recent work in the quantum finance realm from various perspectives. The applications in consideration are Portfolio Optimization, Fraud Detection, and Monte Carlo methods for derivative pricing and risk calculation. Furthermore, we give a comprehensive overview of the applications of quantum computing in the field of blockchain technology which is a main concept in fintech. In that sense, we first introduce the general overview of blockchain with its main cryptographic primitives such as digital signature algorithms, hash functions, and random number generators as well as the security vulnerabilities of blockchain technologies after the merge of quantum computers considering Shor's quantum factoring and Grover's quantum search algorithms. We then discuss the privacy preserving quantum-resistant blockchain systems via threshold signatures, ring signatures, and zero-knowledge proof systems i.e. ZK-SNARKs in quantum resistant blockchains. After emphasizing the difference between the quantum-resistant blockchain and quantum-safe blockchain we mention the security countermeasures to take against the possible quantumized attacks aiming these systems. We finalize our discussion with quantum blockchain, efficient quantum mining and necessary infrastructures for constructing such systems based on quantum computing. This review has the intention to be a bridge to fill the gap between quantum computing and one of its most prominent application realms: Finance. We provide the state-of-the-art results in the intersection of finance and quantum technology for both industrial practitioners and academicians. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.01155v1-abstract-full').style.display = 'none'; document.getElementById('2307.01155v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">64 pages. arXiv admin note: text overlap with arXiv:2211.13191 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.11843">arXiv:2306.11843</a> <span> [<a href="https://arxiv.org/pdf/2306.11843">pdf</a>, <a href="https://arxiv.org/format/2306.11843">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Retrieval-Based Transformer for Table Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Glass%2C+M">Michael Glass</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xueqing Wu</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A+R">Ankita Rajaram Naik</a>, <a href="/search/cs?searchtype=author&query=Rossiello%2C+G">Gaetano Rossiello</a>, <a href="/search/cs?searchtype=author&query=Gliozzo%2C+A">Alfio Gliozzo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.11843v1-abstract-short" style="display: inline;"> Data preparation, also called data wrangling, is considered one of the most expensive and time-consuming steps when performing analytics or building machine learning models. Preparing data typically involves collecting and merging data from complex heterogeneous, and often large-scale data sources, such as data lakes. In this paper, we introduce a novel approach toward automatic data wrangling in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11843v1-abstract-full').style.display = 'inline'; document.getElementById('2306.11843v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.11843v1-abstract-full" style="display: none;"> Data preparation, also called data wrangling, is considered one of the most expensive and time-consuming steps when performing analytics or building machine learning models. Preparing data typically involves collecting and merging data from complex heterogeneous, and often large-scale data sources, such as data lakes. In this paper, we introduce a novel approach toward automatic data wrangling in an attempt to alleviate the effort of end-users, e.g. data analysts, in structuring dynamic views from data lakes in the form of tabular data. We aim to address table augmentation tasks, including row/column population and data imputation. Given a corpus of tables, we propose a retrieval augmented self-trained transformer model. Our self-learning strategy consists in randomly ablating tables from the corpus and training the retrieval-based model to reconstruct the original values or headers given the partial tables as input. We adopt this strategy to first train the dense neural retrieval model encoding table-parts to vectors, and then the end-to-end model trained to perform table augmentation tasks. We test on EntiTables, the standard benchmark for table augmentation, as well as introduce a new benchmark to advance further research: WebTables. Our model consistently and substantially outperforms both supervised statistical methods and the current state-of-the-art transformer-based models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.11843v1-abstract-full').style.display = 'none'; document.getElementById('2306.11843v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Findings of ACL 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14965">arXiv:2305.14965</a> <span> [<a href="https://arxiv.org/pdf/2305.14965">pdf</a>, <a href="https://arxiv.org/format/2305.14965">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Tricking LLMs into Disobedience: Formalizing, Analyzing, and Detecting Jailbreaks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rao%2C+A">Abhinav Rao</a>, <a href="/search/cs?searchtype=author&query=Vashistha%2C+S">Sachin Vashistha</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Aditya%2C+S">Somak Aditya</a>, <a href="/search/cs?searchtype=author&query=Choudhury%2C+M">Monojit Choudhury</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14965v4-abstract-short" style="display: inline;"> Recent explorations with commercial Large Language Models (LLMs) have shown that non-expert users can jailbreak LLMs by simply manipulating their prompts; resulting in degenerate output behavior, privacy and security breaches, offensive outputs, and violations of content regulator policies. Limited studies have been conducted to formalize and analyze these attacks and their mitigations. We bridge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14965v4-abstract-full').style.display = 'inline'; document.getElementById('2305.14965v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14965v4-abstract-full" style="display: none;"> Recent explorations with commercial Large Language Models (LLMs) have shown that non-expert users can jailbreak LLMs by simply manipulating their prompts; resulting in degenerate output behavior, privacy and security breaches, offensive outputs, and violations of content regulator policies. Limited studies have been conducted to formalize and analyze these attacks and their mitigations. We bridge this gap by proposing a formalism and a taxonomy of known (and possible) jailbreaks. We survey existing jailbreak methods and their effectiveness on open-source and commercial LLMs (such as GPT-based models, OPT, BLOOM, and FLAN-T5-XXL). We further discuss the challenges of jailbreak detection in terms of their effectiveness against known attacks. For further analysis, we release a dataset of model outputs across 3700 jailbreak prompts over 4 tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14965v4-abstract-full').style.display = 'none'; document.getElementById('2305.14965v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at LREC-COLING 2024 - The 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.00366">arXiv:2305.00366</a> <span> [<a href="https://arxiv.org/pdf/2305.00366">pdf</a>, <a href="https://arxiv.org/format/2305.00366">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> S2abEL: A Dataset for Entity Linking from Scientific Tables </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lou%2C+Y">Yuze Lou</a>, <a href="/search/cs?searchtype=author&query=Kuehl%2C+B">Bailey Kuehl</a>, <a href="/search/cs?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/cs?searchtype=author&query=Feldman%2C+S">Sergey Feldman</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Downey%2C+D">Doug Downey</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.00366v1-abstract-short" style="display: inline;"> Entity linking (EL) is the task of linking a textual mention to its corresponding entry in a knowledge base, and is critical for many knowledge-intensive NLP applications. When applied to tables in scientific papers, EL is a step toward large-scale scientific knowledge bases that could enable advanced scientific question answering and analytics. We present the first dataset for EL in scientific ta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.00366v1-abstract-full').style.display = 'inline'; document.getElementById('2305.00366v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.00366v1-abstract-full" style="display: none;"> Entity linking (EL) is the task of linking a textual mention to its corresponding entry in a knowledge base, and is critical for many knowledge-intensive NLP applications. When applied to tables in scientific papers, EL is a step toward large-scale scientific knowledge bases that could enable advanced scientific question answering and analytics. We present the first dataset for EL in scientific tables. EL for scientific tables is especially challenging because scientific knowledge bases can be very incomplete, and disambiguating table mentions typically requires understanding the papers's tet in addition to the table. Our dataset, S2abEL, focuses on EL in machine learning results tables and includes hand-labeled cell types, attributed sources, and entity links from the PaperswithCode taxonomy for 8,429 cells from 732 tables. We introduce a neural baseline method designed for EL on scientific tables containing many out-of-knowledge-base mentions, and show that it significantly outperforms a state-of-the-art generic table EL method. The best baselines fall below human performance, and our analysis highlights avenues for improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.00366v1-abstract-full').style.display = 'none'; document.getElementById('2305.00366v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.14334">arXiv:2303.14334</a> <span> [<a href="https://arxiv.org/pdf/2303.14334">pdf</a>, <a href="https://arxiv.org/format/2303.14334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> The Semantic Reader Project: Augmenting Scholarly Documents through AI-Powered Interactive Reading Interfaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lo%2C+K">Kyle Lo</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+J+C">Joseph Chee Chang</a>, <a href="/search/cs?searchtype=author&query=Head%2C+A">Andrew Head</a>, <a href="/search/cs?searchtype=author&query=Bragg%2C+J">Jonathan Bragg</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+A+X">Amy X. Zhang</a>, <a href="/search/cs?searchtype=author&query=Trier%2C+C">Cassidy Trier</a>, <a href="/search/cs?searchtype=author&query=Anastasiades%2C+C">Chloe Anastasiades</a>, <a href="/search/cs?searchtype=author&query=August%2C+T">Tal August</a>, <a href="/search/cs?searchtype=author&query=Authur%2C+R">Russell Authur</a>, <a href="/search/cs?searchtype=author&query=Bragg%2C+D">Danielle Bragg</a>, <a href="/search/cs?searchtype=author&query=Bransom%2C+E">Erin Bransom</a>, <a href="/search/cs?searchtype=author&query=Cachola%2C+I">Isabel Cachola</a>, <a href="/search/cs?searchtype=author&query=Candra%2C+S">Stefan Candra</a>, <a href="/search/cs?searchtype=author&query=Chandrasekhar%2C+Y">Yoganand Chandrasekhar</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yen-Sung Chen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+E+Y">Evie Yu-Yen Cheng</a>, <a href="/search/cs?searchtype=author&query=Chou%2C+Y">Yvonne Chou</a>, <a href="/search/cs?searchtype=author&query=Downey%2C+D">Doug Downey</a>, <a href="/search/cs?searchtype=author&query=Evans%2C+R">Rob Evans</a>, <a href="/search/cs?searchtype=author&query=Fok%2C+R">Raymond Fok</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fangzhou Hu</a>, <a href="/search/cs?searchtype=author&query=Huff%2C+R">Regan Huff</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+D">Dongyeop Kang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+T+S">Tae Soo Kim</a>, <a href="/search/cs?searchtype=author&query=Kinney%2C+R">Rodney Kinney</a> , et al. (30 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.14334v2-abstract-short" style="display: inline;"> Scholarly publications are key to the transfer of knowledge from scholars to others. However, research papers are information-dense, and as the volume of the scientific literature grows, the need for new technology to support the reading process grows. In contrast to the process of finding papers, which has been transformed by Internet technology, the experience of reading research papers has chan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14334v2-abstract-full').style.display = 'inline'; document.getElementById('2303.14334v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.14334v2-abstract-full" style="display: none;"> Scholarly publications are key to the transfer of knowledge from scholars to others. However, research papers are information-dense, and as the volume of the scientific literature grows, the need for new technology to support the reading process grows. In contrast to the process of finding papers, which has been transformed by Internet technology, the experience of reading research papers has changed little in decades. The PDF format for sharing research papers is widely used due to its portability, but it has significant downsides including: static content, poor accessibility for low-vision readers, and difficulty reading on mobile devices. This paper explores the question "Can recent advances in AI and HCI power intelligent, interactive, and accessible reading interfaces -- even for legacy PDFs?" We describe the Semantic Reader Project, a collaborative effort across multiple institutions to explore automatic creation of dynamic reading interfaces for research papers. Through this project, we've developed ten research prototype interfaces and conducted usability studies with more than 300 participants and real-world users showing improved reading experiences for scholars. We've also released a production reading interface for research papers that will incorporate the best features as they mature. We structure this paper around challenges scholars and the public face when reading research papers -- Discovery, Efficiency, Comprehension, Synthesis, and Accessibility -- and present an overview of our progress and remaining open challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.14334v2-abstract-full').style.display = 'none'; document.getElementById('2303.14334v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.01433">arXiv:2303.01433</a> <span> [<a href="https://arxiv.org/pdf/2303.01433">pdf</a>, <a href="https://arxiv.org/format/2303.01433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Do Machine Learning Models Learn Statistical Rules Inferred from Data? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aaditya Naik</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yinjun Wu</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+M">Mayur Naik</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+E">Eric Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.01433v2-abstract-short" style="display: inline;"> Machine learning models can make critical errors that are easily hidden within vast amounts of data. Such errors often run counter to rules based on human intuition. However, rules based on human knowledge are challenging to scale or to even formalize. We thereby seek to infer statistical rules from the data and quantify the extent to which a model has learned them. We propose a framework SQRL tha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01433v2-abstract-full').style.display = 'inline'; document.getElementById('2303.01433v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.01433v2-abstract-full" style="display: none;"> Machine learning models can make critical errors that are easily hidden within vast amounts of data. Such errors often run counter to rules based on human intuition. However, rules based on human knowledge are challenging to scale or to even formalize. We thereby seek to infer statistical rules from the data and quantify the extent to which a model has learned them. We propose a framework SQRL that integrates logic-based methods with statistical inference to derive these rules from a model's training data without supervision. We further show how to adapt models at test time to reduce rule violations and produce more coherent predictions. SQRL generates up to 300K rules over datasets from vision, tabular, and language settings. We uncover up to 158K violations of those rules by state-of-the-art models for classification, object detection, and data imputation. Test-time adaptation reduces these violations by up to 68.7% with relative performance improvement up to 32%. SQRL is available at https://github.com/DebugML/sqrl. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01433v2-abstract-full').style.display = 'none'; document.getElementById('2303.01433v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.06754">arXiv:2302.06754</a> <span> [<a href="https://arxiv.org/pdf/2302.06754">pdf</a>, <a href="https://arxiv.org/format/2302.06754">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3544548.3580841">10.1145/3544548.3580841 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Relatedly: Scaffolding Literature Reviews with Existing Related Work Sections </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Palani%2C+S">Srishti Palani</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Downey%2C+D">Doug Downey</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+A+X">Amy X. Zhang</a>, <a href="/search/cs?searchtype=author&query=Bragg%2C+J">Jonathan Bragg</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+J+C">Joseph Chee Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.06754v1-abstract-short" style="display: inline;"> Scholars who want to research a scientific topic must take time to read, extract meaning, and identify connections across many papers. As scientific literature grows, this becomes increasingly challenging. Meanwhile, authors summarize prior research in papers' related work sections, though this is scoped to support a single paper. A formative study found that while reading multiple related work pa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.06754v1-abstract-full').style.display = 'inline'; document.getElementById('2302.06754v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.06754v1-abstract-full" style="display: none;"> Scholars who want to research a scientific topic must take time to read, extract meaning, and identify connections across many papers. As scientific literature grows, this becomes increasingly challenging. Meanwhile, authors summarize prior research in papers' related work sections, though this is scoped to support a single paper. A formative study found that while reading multiple related work paragraphs helps overview a topic, it is hard to navigate overlapping and diverging references and research foci. In this work, we design a system, Relatedly, that scaffolds exploring and reading multiple related work paragraphs on a topic, with features including dynamic re-ranking and highlighting to spotlight unexplored dissimilar information, auto-generated descriptive paragraph headings, and low-lighting of redundant information. From a within-subjects user study (n=15), we found that scholars generate more coherent, insightful, and comprehensive topic outlines using Relatedly compared to a baseline paper list. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.06754v1-abstract-full').style.display = 'none'; document.getElementById('2302.06754v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.00568">arXiv:2209.00568</a> <span> [<a href="https://arxiv.org/pdf/2209.00568">pdf</a>, <a href="https://arxiv.org/format/2209.00568">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3627673.3679520">10.1145/3627673.3679520 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Distilling Multi-Scale Knowledge for Event Temporal Relation Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+H">Hao-Ren Yao</a>, <a href="/search/cs?searchtype=author&query=Breitfeller%2C+L">Luke Breitfeller</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chunxiao Zhou</a>, <a href="/search/cs?searchtype=author&query=Rose%2C+C">Carolyn Rose</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.00568v3-abstract-short" style="display: inline;"> Event Temporal Relation Extraction (ETRE) is paramount but challenging. Within a discourse, event pairs are situated at different distances or the so-called proximity bands. The temporal ordering communicated about event pairs where at more remote (i.e., ``long'') or less remote (i.e., ``short'') proximity bands are encoded differently. SOTA models have tended to perform well on events situated at… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.00568v3-abstract-full').style.display = 'inline'; document.getElementById('2209.00568v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.00568v3-abstract-full" style="display: none;"> Event Temporal Relation Extraction (ETRE) is paramount but challenging. Within a discourse, event pairs are situated at different distances or the so-called proximity bands. The temporal ordering communicated about event pairs where at more remote (i.e., ``long'') or less remote (i.e., ``short'') proximity bands are encoded differently. SOTA models have tended to perform well on events situated at either short or long proximity bands, but not both. Nonetheless, real-world, natural texts contain all types of temporal event-pairs. In this paper, we present MulCo: Distilling Multi-Scale Knowledge via Contrastive Learning, a knowledge co-distillation approach that shares knowledge across multiple event pair proximity bands to improve performance on all types of temporal datasets. Our experimental results show that MulCo successfully integrates linguistic cues pertaining to temporal reasoning across both short and long proximity bands and achieves new state-of-the-art results on several ETRE benchmark datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.00568v3-abstract-full').style.display = 'none'; document.getElementById('2209.00568v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CIKM 2024 Full Research Track, camera ready version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.05950">arXiv:2208.05950</a> <span> [<a href="https://arxiv.org/pdf/2208.05950">pdf</a>, <a href="https://arxiv.org/format/2208.05950">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Interactive Code Generation via Test-Driven User-Intent Formalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lahiri%2C+S+K">Shuvendu K. Lahiri</a>, <a href="/search/cs?searchtype=author&query=Fakhoury%2C+S">Sarah Fakhoury</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aaditya Naik</a>, <a href="/search/cs?searchtype=author&query=Sakkas%2C+G">Georgios Sakkas</a>, <a href="/search/cs?searchtype=author&query=Chakraborty%2C+S">Saikat Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Musuvathi%2C+M">Madanlal Musuvathi</a>, <a href="/search/cs?searchtype=author&query=Choudhury%2C+P">Piali Choudhury</a>, <a href="/search/cs?searchtype=author&query=von+Veh%2C+C">Curtis von Veh</a>, <a href="/search/cs?searchtype=author&query=Inala%2C+J+P">Jeevana Priya Inala</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chenglong Wang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jianfeng Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.05950v2-abstract-short" style="display: inline;"> Large language models (LLMs) have shown great potential in automating significant aspects of coding by producing natural code from informal natural language (NL) intent. However, when interacting with LLMs, users have no guarantees that the code suggestions produced correctly satisfy the intent they provided. In fact, it is hard to define a notion of correctness since natural language can be ambig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.05950v2-abstract-full').style.display = 'inline'; document.getElementById('2208.05950v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.05950v2-abstract-full" style="display: none;"> Large language models (LLMs) have shown great potential in automating significant aspects of coding by producing natural code from informal natural language (NL) intent. However, when interacting with LLMs, users have no guarantees that the code suggestions produced correctly satisfy the intent they provided. In fact, it is hard to define a notion of correctness since natural language can be ambiguous and lacks a formal semantics. In this paper, we propose the workflow of {\it interactive test-driven code generation}, which leverages lightweight user feedback to (a) formalize the user intent using generated tests that can be useful for debugging, and (b) produce an improved set of code suggestions by pruning and ranking candidate code suggestions. We describe a language-agnostic abstract algorithm and a concrete implementation TiCoder. We perform an automated evaluation of TiCoder on the \emph{MBPP} and \emph{HumanEval} code generation benchmarks. Our results are promising with using the OpenAI Codex LLM: our best algorithm improves the \passk{1} code generation accuracy (in absolute percentages) between $22.49\%$ to $37.71\%$ for MBPP and between $24.79\%$ to $53.98\%$ for HumanEval using between 1 to 5 simulated user queries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.05950v2-abstract-full').style.display = 'none'; document.getElementById('2208.05950v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.05297">arXiv:2208.05297</a> <span> [<a href="https://arxiv.org/pdf/2208.05297">pdf</a>, <a href="https://arxiv.org/format/2208.05297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning to Improve Code Efficiency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+B">Binghong Chen</a>, <a href="/search/cs?searchtype=author&query=Tarlow%2C+D">Daniel Tarlow</a>, <a href="/search/cs?searchtype=author&query=Swersky%2C+K">Kevin Swersky</a>, <a href="/search/cs?searchtype=author&query=Maas%2C+M">Martin Maas</a>, <a href="/search/cs?searchtype=author&query=Heiber%2C+P">Pablo Heiber</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Ashish Naik</a>, <a href="/search/cs?searchtype=author&query=Hashemi%2C+M">Milad Hashemi</a>, <a href="/search/cs?searchtype=author&query=Ranganathan%2C+P">Parthasarathy Ranganathan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.05297v1-abstract-short" style="display: inline;"> Improvements in the performance of computing systems, driven by Moore's Law, have transformed society. As such hardware-driven gains slow down, it becomes even more important for software developers to focus on performance and efficiency during development. While several studies have demonstrated the potential from such improved code efficiency (e.g., 2x better generational improvements compared t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.05297v1-abstract-full').style.display = 'inline'; document.getElementById('2208.05297v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.05297v1-abstract-full" style="display: none;"> Improvements in the performance of computing systems, driven by Moore's Law, have transformed society. As such hardware-driven gains slow down, it becomes even more important for software developers to focus on performance and efficiency during development. While several studies have demonstrated the potential from such improved code efficiency (e.g., 2x better generational improvements compared to hardware), unlocking these gains in practice has been challenging. Reasoning about algorithmic complexity and the interaction of coding patterns on hardware can be challenging for the average programmer, especially when combined with pragmatic constraints around development velocity and multi-person development. This paper seeks to address this problem. We analyze a large competitive programming dataset from the Google Code Jam competition and find that efficient code is indeed rare, with a 2x runtime difference between the median and the 90th percentile of solutions. We propose using machine learning to automatically provide prescriptive feedback in the form of hints, to guide programmers towards writing high-performance code. To automatically learn these hints from the dataset, we propose a novel discrete variational auto-encoder, where each discrete latent variable represents a different learned category of code-edit that increases performance. We show that this method represents the multi-modal space of code efficiency edits better than a sequence-to-sequence baseline and generates a distribution of more efficient solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.05297v1-abstract-full').style.display = 'none'; document.getElementById('2208.05297v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.04683">arXiv:2208.04683</a> <span> [<a href="https://arxiv.org/pdf/2208.04683">pdf</a>, <a href="https://arxiv.org/format/2208.04683">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Applying data technologies to combat AMR: current status, challenges, and opportunities on the way forward </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chindelevitch%2C+L">Leonid Chindelevitch</a>, <a href="/search/cs?searchtype=author&query=Jauneikaite%2C+E">Elita Jauneikaite</a>, <a href="/search/cs?searchtype=author&query=Wheeler%2C+N+E">Nicole E. Wheeler</a>, <a href="/search/cs?searchtype=author&query=Allel%2C+K">Kasim Allel</a>, <a href="/search/cs?searchtype=author&query=Ansiri-Asafoakaa%2C+B+Y">Bede Yaw Ansiri-Asafoakaa</a>, <a href="/search/cs?searchtype=author&query=Awuah%2C+W+A">Wireko A. Awuah</a>, <a href="/search/cs?searchtype=author&query=Bauer%2C+D+C">Denis C. Bauer</a>, <a href="/search/cs?searchtype=author&query=Beisken%2C+S">Stephan Beisken</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+K">Kara Fan</a>, <a href="/search/cs?searchtype=author&query=Grant%2C+G">Gary Grant</a>, <a href="/search/cs?searchtype=author&query=Graz%2C+M">Michael Graz</a>, <a href="/search/cs?searchtype=author&query=Khalaf%2C+Y">Yara Khalaf</a>, <a href="/search/cs?searchtype=author&query=Liyanapathirana%2C+V">Veranja Liyanapathirana</a>, <a href="/search/cs?searchtype=author&query=Montefusco-Pereira%2C+C">Carlos Montefusco-Pereira</a>, <a href="/search/cs?searchtype=author&query=Mugisha%2C+L">Lawrence Mugisha</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharv Naik</a>, <a href="/search/cs?searchtype=author&query=Nanono%2C+S">Sylvia Nanono</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+A">Anthony Nguyen</a>, <a href="/search/cs?searchtype=author&query=Rawson%2C+T">Timothy Rawson</a>, <a href="/search/cs?searchtype=author&query=Reddy%2C+K">Kessendri Reddy</a>, <a href="/search/cs?searchtype=author&query=Ruzante%2C+J+M">Juliana M. Ruzante</a>, <a href="/search/cs?searchtype=author&query=Schmider%2C+A">Anneke Schmider</a>, <a href="/search/cs?searchtype=author&query=Stocker%2C+R">Roman Stocker</a>, <a href="/search/cs?searchtype=author&query=Unruh%2C+L">Leonhardt Unruh</a>, <a href="/search/cs?searchtype=author&query=Waruingi%2C+D">Daniel Waruingi</a> , et al. (2 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.04683v2-abstract-short" style="display: inline;"> Antimicrobial resistance (AMR) is a growing public health threat, estimated to cause over 10 million deaths per year and cost the global economy 100 trillion USD by 2050 under status quo projections. These losses would mainly result from an increase in the morbidity and mortality from treatment failure, AMR infections during medical procedures, and a loss of quality of life attributed to AMR. Nume… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.04683v2-abstract-full').style.display = 'inline'; document.getElementById('2208.04683v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.04683v2-abstract-full" style="display: none;"> Antimicrobial resistance (AMR) is a growing public health threat, estimated to cause over 10 million deaths per year and cost the global economy 100 trillion USD by 2050 under status quo projections. These losses would mainly result from an increase in the morbidity and mortality from treatment failure, AMR infections during medical procedures, and a loss of quality of life attributed to AMR. Numerous interventions have been proposed to control the development of AMR and mitigate the risks posed by its spread. This paper reviews key aspects of bacterial AMR management and control which make essential use of data technologies such as artificial intelligence, machine learning, and mathematical and statistical modelling, fields that have seen rapid developments in this century. Although data technologies have become an integral part of biomedical research, their impact on AMR management has remained modest. We outline the use of data technologies to combat AMR, detailing recent advancements in four complementary categories: surveillance, prevention, diagnosis, and treatment. We provide an overview on current AMR control approaches using data technologies within biomedical research, clinical practice, and in the "One Health" context. We discuss the potential impact and challenges wider implementation of data technologies is facing in high-income as well as in low- and middle-income countries, and recommend concrete actions needed to allow these technologies to be more readily integrated within the healthcare and public health sectors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.04683v2-abstract-full').style.display = 'none'; document.getElementById('2208.04683v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">65 pages, 3 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.1; J.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.06300">arXiv:2207.06300</a> <span> [<a href="https://arxiv.org/pdf/2207.06300">pdf</a>, <a href="https://arxiv.org/format/2207.06300">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Re2G: Retrieve, Rerank, Generate </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Glass%2C+M">Michael Glass</a>, <a href="/search/cs?searchtype=author&query=Rossiello%2C+G">Gaetano Rossiello</a>, <a href="/search/cs?searchtype=author&query=Chowdhury%2C+M+F+M">Md Faisal Mahbub Chowdhury</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A+R">Ankita Rajaram Naik</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+P">Pengshan Cai</a>, <a href="/search/cs?searchtype=author&query=Gliozzo%2C+A">Alfio Gliozzo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.06300v1-abstract-short" style="display: inline;"> As demonstrated by GPT-3 and T5, transformers grow in capability as parameter spaces become larger and larger. However, for tasks that require a large amount of knowledge, non-parametric memory allows models to grow dramatically with a sub-linear increase in computational cost and GPU memory requirements. Recent models such as RAG and REALM have introduced retrieval into conditional generation. Th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.06300v1-abstract-full').style.display = 'inline'; document.getElementById('2207.06300v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.06300v1-abstract-full" style="display: none;"> As demonstrated by GPT-3 and T5, transformers grow in capability as parameter spaces become larger and larger. However, for tasks that require a large amount of knowledge, non-parametric memory allows models to grow dramatically with a sub-linear increase in computational cost and GPU memory requirements. Recent models such as RAG and REALM have introduced retrieval into conditional generation. These models incorporate neural initial retrieval from a corpus of passages. We build on this line of research, proposing Re2G, which combines both neural initial retrieval and reranking into a BART-based sequence-to-sequence generation. Our reranking approach also permits merging retrieval results from sources with incomparable scores, enabling an ensemble of BM25 and neural initial retrieval. To train our system end-to-end, we introduce a novel variation of knowledge distillation to train the initial retrieval, reranker, and generation using only ground truth on the target sequence output. We find large gains in four diverse tasks: zero-shot slot filling, question answering, fact-checking, and dialog, with relative gains of 9% to 34% over the previous state-of-the-art on the KILT leaderboard. We make our code available as open source at https://github.com/IBM/kgi-slot-filling/tree/re2g. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.06300v1-abstract-full').style.display = 'none'; document.getElementById('2207.06300v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NAACL 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2204.07705">arXiv:2204.07705</a> <span> [<a href="https://arxiv.org/pdf/2204.07705">pdf</a>, <a href="https://arxiv.org/format/2204.07705">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Super-NaturalInstructions: Generalization via Declarative Instructions on 1600+ NLP Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yizhong Wang</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+S">Swaroop Mishra</a>, <a href="/search/cs?searchtype=author&query=Alipoormolabashi%2C+P">Pegah Alipoormolabashi</a>, <a href="/search/cs?searchtype=author&query=Kordi%2C+Y">Yeganeh Kordi</a>, <a href="/search/cs?searchtype=author&query=Mirzaei%2C+A">Amirreza Mirzaei</a>, <a href="/search/cs?searchtype=author&query=Arunkumar%2C+A">Anjana Arunkumar</a>, <a href="/search/cs?searchtype=author&query=Ashok%2C+A">Arjun Ashok</a>, <a href="/search/cs?searchtype=author&query=Dhanasekaran%2C+A+S">Arut Selvan Dhanasekaran</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Stap%2C+D">David Stap</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+E">Eshaan Pathak</a>, <a href="/search/cs?searchtype=author&query=Karamanolakis%2C+G">Giannis Karamanolakis</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+H+G">Haizhi Gary Lai</a>, <a href="/search/cs?searchtype=author&query=Purohit%2C+I">Ishan Purohit</a>, <a href="/search/cs?searchtype=author&query=Mondal%2C+I">Ishani Mondal</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+J">Jacob Anderson</a>, <a href="/search/cs?searchtype=author&query=Kuznia%2C+K">Kirby Kuznia</a>, <a href="/search/cs?searchtype=author&query=Doshi%2C+K">Krima Doshi</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+M">Maitreya Patel</a>, <a href="/search/cs?searchtype=author&query=Pal%2C+K+K">Kuntal Kumar Pal</a>, <a href="/search/cs?searchtype=author&query=Moradshahi%2C+M">Mehrad Moradshahi</a>, <a href="/search/cs?searchtype=author&query=Parmar%2C+M">Mihir Parmar</a>, <a href="/search/cs?searchtype=author&query=Purohit%2C+M">Mirali Purohit</a>, <a href="/search/cs?searchtype=author&query=Varshney%2C+N">Neeraj Varshney</a>, <a href="/search/cs?searchtype=author&query=Kaza%2C+P+R">Phani Rohitha Kaza</a> , et al. (15 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2204.07705v3-abstract-short" style="display: inline;"> How well can NLP models generalize to a variety of unseen tasks when provided with task instructions? To address this question, we first introduce Super-NaturalInstructions, a benchmark of 1,616 diverse NLP tasks and their expert-written instructions. Our collection covers 76 distinct task types, including but not limited to classification, extraction, infilling, sequence tagging, text rewriting,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07705v3-abstract-full').style.display = 'inline'; document.getElementById('2204.07705v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2204.07705v3-abstract-full" style="display: none;"> How well can NLP models generalize to a variety of unseen tasks when provided with task instructions? To address this question, we first introduce Super-NaturalInstructions, a benchmark of 1,616 diverse NLP tasks and their expert-written instructions. Our collection covers 76 distinct task types, including but not limited to classification, extraction, infilling, sequence tagging, text rewriting, and text composition. This large and diverse collection of tasks enables rigorous benchmarking of cross-task generalization under instructions -- training models to follow instructions on a subset of tasks and evaluating them on the remaining unseen ones. Furthermore, we build Tk-Instruct, a transformer model trained to follow a variety of in-context instructions (plain language task definitions or k-shot examples). Our experiments show that Tk-Instruct outperforms existing instruction-following models such as InstructGPT by over 9% on our benchmark despite being an order of magnitude smaller. We further analyze generalization as a function of various scaling parameters, such as the number of observed tasks, the number of instances per task, and model sizes. We hope our dataset and model facilitate future progress towards more general-purpose NLP models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2204.07705v3-abstract-full').style.display = 'none'; document.getElementById('2204.07705v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2022, 25 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.10610">arXiv:2202.10610</a> <span> [<a href="https://arxiv.org/pdf/2202.10610">pdf</a>, <a href="https://arxiv.org/format/2202.10610">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Knowledge Base Question Answering by Case-based Reasoning over Subgraphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Das%2C+R">Rajarshi Das</a>, <a href="/search/cs?searchtype=author&query=Godbole%2C+A">Ameya Godbole</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Ankita Naik</a>, <a href="/search/cs?searchtype=author&query=Tower%2C+E">Elliot Tower</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+R">Robin Jia</a>, <a href="/search/cs?searchtype=author&query=Zaheer%2C+M">Manzil Zaheer</a>, <a href="/search/cs?searchtype=author&query=Hajishirzi%2C+H">Hannaneh Hajishirzi</a>, <a href="/search/cs?searchtype=author&query=McCallum%2C+A">Andrew McCallum</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.10610v2-abstract-short" style="display: inline;"> Question answering (QA) over knowledge bases (KBs) is challenging because of the diverse, essentially unbounded, types of reasoning patterns needed. However, we hypothesize in a large KB, reasoning patterns required to answer a query type reoccur for various entities in their respective subgraph neighborhoods. Leveraging this structural similarity between local neighborhoods of different subgraphs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.10610v2-abstract-full').style.display = 'inline'; document.getElementById('2202.10610v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.10610v2-abstract-full" style="display: none;"> Question answering (QA) over knowledge bases (KBs) is challenging because of the diverse, essentially unbounded, types of reasoning patterns needed. However, we hypothesize in a large KB, reasoning patterns required to answer a query type reoccur for various entities in their respective subgraph neighborhoods. Leveraging this structural similarity between local neighborhoods of different subgraphs, we introduce a semiparametric model (CBR-SUBG) with (i) a nonparametric component that for each query, dynamically retrieves other similar $k$-nearest neighbor (KNN) training queries along with query-specific subgraphs and (ii) a parametric component that is trained to identify the (latent) reasoning patterns from the subgraphs of KNN queries and then apply them to the subgraph of the target query. We also propose an adaptive subgraph collection strategy to select a query-specific compact subgraph, allowing us to scale to full Freebase KB containing billions of facts. We show that CBR-SUBG can answer queries requiring subgraph reasoning patterns and performs competitively with the best models on several KBQA benchmarks. Our subgraph collection strategy also produces more compact subgraphs (e.g. 55\% reduction in size for WebQSP while increasing answer recall by 4.85\%)\footnote{Code, model, and subgraphs are available at \url{https://github.com/rajarshd/CBR-SUBG}}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.10610v2-abstract-full').style.display = 'none'; document.getElementById('2202.10610v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.05787">arXiv:2112.05787</a> <span> [<a href="https://arxiv.org/pdf/2112.05787">pdf</a>, <a href="https://arxiv.org/format/2112.05787">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Representation Learning for Conversational Data using Discourse Mutual Information Maximization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Santra%2C+B">Bishal Santra</a>, <a href="/search/cs?searchtype=author&query=Roychowdhury%2C+S">Sumegh Roychowdhury</a>, <a href="/search/cs?searchtype=author&query=Mandal%2C+A">Aishik Mandal</a>, <a href="/search/cs?searchtype=author&query=Gurram%2C+V">Vasu Gurram</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+M">Manish Gupta</a>, <a href="/search/cs?searchtype=author&query=Goyal%2C+P">Pawan Goyal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.05787v2-abstract-short" style="display: inline;"> Although many pretrained models exist for text or images, there have been relatively fewer attempts to train representations specifically for dialog understanding. Prior works usually relied on finetuned representations based on generic text representation models like BERT or GPT-2. But such language modeling pretraining objectives do not take the structural information of conversational text into… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.05787v2-abstract-full').style.display = 'inline'; document.getElementById('2112.05787v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.05787v2-abstract-full" style="display: none;"> Although many pretrained models exist for text or images, there have been relatively fewer attempts to train representations specifically for dialog understanding. Prior works usually relied on finetuned representations based on generic text representation models like BERT or GPT-2. But such language modeling pretraining objectives do not take the structural information of conversational text into consideration. Although generative dialog models can learn structural features too, we argue that the structure-unaware word-by-word generation is not suitable for effective conversation modeling. We empirically demonstrate that such representations do not perform consistently across various dialog understanding tasks. Hence, we propose a structure-aware Mutual Information based loss-function DMI (Discourse Mutual Information) for training dialog-representation models, that additionally captures the inherent uncertainty in response prediction. Extensive evaluation on nine diverse dialog modeling tasks shows that our proposed DMI-based models outperform strong baselines by significant margins. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.05787v2-abstract-full').style.display = 'none'; document.getElementById('2112.05787v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint, 15 pages, To appear in NAACL 2022 (Main)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.08374">arXiv:2111.08374</a> <span> [<a href="https://arxiv.org/pdf/2111.08374">pdf</a>, <a href="https://arxiv.org/format/2111.08374">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Literature-Augmented Clinical Outcome Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Parasa%2C+S">Sravanthi Parasa</a>, <a href="/search/cs?searchtype=author&query=Feldman%2C+S">Sergey Feldman</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L+L">Lucy Lu Wang</a>, <a href="/search/cs?searchtype=author&query=Hope%2C+T">Tom Hope</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.08374v3-abstract-short" style="display: inline;"> We present BEEP (Biomedical Evidence-Enhanced Predictions), a novel approach for clinical outcome prediction that retrieves patient-specific medical literature and incorporates it into predictive models. Based on each individual patient's clinical notes, we train language models (LMs) to find relevant papers and fuse them with information from notes to predict outcomes such as in-hospital mortalit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.08374v3-abstract-full').style.display = 'inline'; document.getElementById('2111.08374v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.08374v3-abstract-full" style="display: none;"> We present BEEP (Biomedical Evidence-Enhanced Predictions), a novel approach for clinical outcome prediction that retrieves patient-specific medical literature and incorporates it into predictive models. Based on each individual patient's clinical notes, we train language models (LMs) to find relevant papers and fuse them with information from notes to predict outcomes such as in-hospital mortality. We develop methods to retrieve literature based on noisy, information-dense patient notes, and to augment existing outcome prediction models with retrieved papers in a manner that maximizes predictive accuracy. Our approach boosts predictive performance on three important clinical tasks in comparison to strong recent LM baselines, increasing F1 by up to 5 points and precision@Top-K by a large margin of over 25%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.08374v3-abstract-full').style.display = 'none'; document.getElementById('2111.08374v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at Findings of NAACL 2022. Extended Abstract presented at Machine Learning for Health (ML4H) symposium 2022, November 28th, 2022, New Orleans, United States & Virtual, http://www.ml4h.cc, 16 pages. Code available at: https://github.com/allenai/BEEP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.01340">arXiv:2111.01340</a> <span> [<a href="https://arxiv.org/pdf/2111.01340">pdf</a>, <a href="https://arxiv.org/format/2111.01340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Adapting to the Long Tail: A Meta-Analysis of Transfer Learning Research for Language Understanding Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Lehman%2C+J">Jill Lehman</a>, <a href="/search/cs?searchtype=author&query=Rose%2C+C">Carolyn Rose</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.01340v2-abstract-short" style="display: inline;"> Natural language understanding (NLU) has made massive progress driven by large benchmarks, but benchmarks often leave a long tail of infrequent phenomena underrepresented. We reflect on the question: have transfer learning methods sufficiently addressed the poor performance of benchmark-trained models on the long tail? We conceptualize the long tail using macro-level dimensions (e.g., underreprese… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.01340v2-abstract-full').style.display = 'inline'; document.getElementById('2111.01340v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.01340v2-abstract-full" style="display: none;"> Natural language understanding (NLU) has made massive progress driven by large benchmarks, but benchmarks often leave a long tail of infrequent phenomena underrepresented. We reflect on the question: have transfer learning methods sufficiently addressed the poor performance of benchmark-trained models on the long tail? We conceptualize the long tail using macro-level dimensions (e.g., underrepresented genres, topics, etc.), and perform a qualitative meta-analysis of 100 representative papers on transfer learning research for NLU. Our analysis asks three questions: (i) Which long tail dimensions do transfer learning studies target? (ii) Which properties of adaptation methods help improve performance on the long tail? (iii) Which methodological gaps have greatest negative impact on long tail performance? Our answers highlight major avenues for future research in transfer learning for the long tail. Lastly, using our meta-analysis framework, we perform a case study comparing the performance of various adaptation methods on clinical narratives, which provides interesting insights that may enable us to make progress along these future avenues. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.01340v2-abstract-full').style.display = 'none'; document.getElementById('2111.01340v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in TACL 2022. This is a pre-MIT Press publication version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.13855">arXiv:2110.13855</a> <span> [<a href="https://arxiv.org/pdf/2110.13855">pdf</a>, <a href="https://arxiv.org/format/2110.13855">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Average-Reward Learning and Planning with Options </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wan%2C+Y">Yi Wan</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Abhishek Naik</a>, <a href="/search/cs?searchtype=author&query=Sutton%2C+R+S">Richard S. Sutton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.13855v1-abstract-short" style="display: inline;"> We extend the options framework for temporal abstraction in reinforcement learning from discounted Markov decision processes (MDPs) to average-reward MDPs. Our contributions include general convergent off-policy inter-option learning algorithms, intra-option algorithms for learning values and models, as well as sample-based planning variants of our learning algorithms. Our algorithms and convergen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.13855v1-abstract-full').style.display = 'inline'; document.getElementById('2110.13855v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.13855v1-abstract-full" style="display: none;"> We extend the options framework for temporal abstraction in reinforcement learning from discounted Markov decision processes (MDPs) to average-reward MDPs. Our contributions include general convergent off-policy inter-option learning algorithms, intra-option algorithms for learning values and models, as well as sample-based planning variants of our learning algorithms. Our algorithms and convergence proofs extend those recently developed by Wan, Naik, and Sutton. We also extend the notion of option-interrupting behavior from the discounted to the average-reward formulation. We show the efficacy of the proposed algorithms with experiments on a continuing version of the Four-Room domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.13855v1-abstract-full').style.display = 'none'; document.getElementById('2110.13855v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.08927">arXiv:2109.08927</a> <span> [<a href="https://arxiv.org/pdf/2109.08927">pdf</a>, <a href="https://arxiv.org/format/2109.08927">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Weakly Supervised Explainable Phrasal Reasoning with Neural Fuzzy Logic </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zijun Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z+X">Zi Xuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+Z">Zhijian Mei</a>, <a href="/search/cs?searchtype=author&query=Firdaus%2C+M">Mauajama Firdaus</a>, <a href="/search/cs?searchtype=author&query=Mou%2C+L">Lili Mou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.08927v3-abstract-short" style="display: inline;"> Natural language inference (NLI) aims to determine the logical relationship between two sentences, such as Entailment, Contradiction, and Neutral. In recent years, deep learning models have become a prevailing approach to NLI, but they lack interpretability and explainability. In this work, we address the explainability of NLI by weakly supervised logical reasoning, and propose an Explainable Phra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08927v3-abstract-full').style.display = 'inline'; document.getElementById('2109.08927v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.08927v3-abstract-full" style="display: none;"> Natural language inference (NLI) aims to determine the logical relationship between two sentences, such as Entailment, Contradiction, and Neutral. In recent years, deep learning models have become a prevailing approach to NLI, but they lack interpretability and explainability. In this work, we address the explainability of NLI by weakly supervised logical reasoning, and propose an Explainable Phrasal Reasoning (EPR) approach. Our model first detects phrases as the semantic unit and aligns corresponding phrases in the two sentences. Then, the model predicts the NLI label for the aligned phrases, and induces the sentence label by fuzzy logic formulas. Our EPR is almost everywhere differentiable and thus the system can be trained end to end. In this way, we are able to provide explicit explanations of phrasal logical relationships in a weakly supervised manner. We further show that such reasoning results help textual explanation generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.08927v3-abstract-full').style.display = 'none'; document.getElementById('2109.08927v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.07314">arXiv:2105.07314</a> <span> [<a href="https://arxiv.org/pdf/2105.07314">pdf</a>, <a href="https://arxiv.org/format/2105.07314">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> STAGE: Tool for Automated Extraction of Semantic Time Cues to Enrich Neural Temporal Ordering Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Breitfeller%2C+L">Luke Breitfeller</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Aakanksha Naik</a>, <a href="/search/cs?searchtype=author&query=Rose%2C+C">Carolyn Rose</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.07314v1-abstract-short" style="display: inline;"> Despite achieving state-of-the-art accuracy on temporal ordering of events, neural models showcase significant gaps in performance. Our work seeks to fill one of these gaps by leveraging an under-explored dimension of textual semantics: rich semantic information provided by explicit textual time cues. We develop STAGE, a system that consists of a novel temporal framework and a parser that can auto… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.07314v1-abstract-full').style.display = 'inline'; document.getElementById('2105.07314v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.07314v1-abstract-full" style="display: none;"> Despite achieving state-of-the-art accuracy on temporal ordering of events, neural models showcase significant gaps in performance. Our work seeks to fill one of these gaps by leveraging an under-explored dimension of textual semantics: rich semantic information provided by explicit textual time cues. We develop STAGE, a system that consists of a novel temporal framework and a parser that can automatically extract time cues and convert them into representations suitable for integration with neural models. We demonstrate the utility of extracted cues by integrating them with an event ordering model using a joint BiLSTM and ILP constraint architecture. We outline the functionality of the 3-part STAGE processing approach, and show two methods of integrating its representations with the BiLSTM-ILP model: (i) incorporating semantic cues as additional features, and (ii) generating new constraints from semantic cues to be enforced in the ILP. We demonstrate promising results on two event ordering datasets, and highlight important issues in semantic cue representation and integration for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.07314v1-abstract-full').style.display = 'none'; document.getElementById('2105.07314v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.03983">arXiv:2105.03983</a> <span> [<a href="https://arxiv.org/pdf/2105.03983">pdf</a>, <a href="https://arxiv.org/format/2105.03983">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3404835.3463080">10.1145/3404835.3463080 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Understanding the Role of Affect Dimensions in Detecting Emotions from Tweets: A Multi-task Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mukherjee%2C+R">Rajdeep Mukherjee</a>, <a href="/search/cs?searchtype=author&query=Naik%2C+A">Atharva Naik</a>, <a href="/search/cs?searchtype=author&query=Poddar%2C+S">Sriyash Poddar</a>, <a href="/search/cs?searchtype=author&query=Dasgupta%2C+S">Soham Dasgupta</a>, <a href="/search/cs?searchtype=author&query=Ganguly%2C+N">Niloy Ganguly</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.03983v1-abstract-short" style="display: inline;"> We propose VADEC, a multi-task framework that exploits the correlation between the categorical and dimensional models of emotion representation for better subjectivity analysis. Focusing primarily on the effective detection of emotions from tweets, we jointly train multi-label emotion classification and multi-dimensional emotion regression, thereby utilizing the inter-relatedness between the tasks… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.03983v1-abstract-full').style.display = 'inline'; document.getElementById('2105.03983v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.03983v1-abstract-full" style="display: none;"> We propose VADEC, a multi-task framework that exploits the correlation between the categorical and dimensional models of emotion representation for better subjectivity analysis. Focusing primarily on the effective detection of emotions from tweets, we jointly train multi-label emotion classification and multi-dimensional emotion regression, thereby utilizing the inter-relatedness between the tasks. Co-training especially helps in improving the performance of the classification task as we outperform the strongest baselines with 3.4%, 11%, and 3.9% gains in Jaccard Accuracy, Macro-F1, and Micro-F1 scores respectively on the AIT dataset. We also achieve state-of-the-art results with 11.3% gains averaged over six different metrics on the SenWave dataset. For the regression task, VADEC, when trained with SenWave, achieves 7.6% and 16.5% gains in Pearson Correlation scores over the current state-of-the-art on the EMOBANK dataset for the Valence (V) and Dominance (D) affect dimensions respectively. We conclude our work with a case study on COVID-19 tweets posted by Indians that further helps in establishing the efficacy of our proposed solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.03983v1-abstract-full').style.display = 'none'; document.getElementById('2105.03983v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, Short Paper accepted at SIGIR 2021</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7; J.4 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2021 </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Naik%2C+A&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Naik%2C+A&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Naik%2C+A&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository